diff options
286 files changed, 5487 insertions, 2374 deletions
diff --git a/Documentation/atomic_bitops.txt b/Documentation/atomic_bitops.txt index 093cdaefdb37..d8b101c97031 100644 --- a/Documentation/atomic_bitops.txt +++ b/Documentation/atomic_bitops.txt @@ -59,7 +59,7 @@ Like with atomic_t, the rule of thumb is: - RMW operations that have a return value are fully ordered. - RMW operations that are conditional are unordered on FAILURE, - otherwise the above rules apply. In the case of test_and_{}_bit() operations, + otherwise the above rules apply. In the case of test_and_set_bit_lock(), if the bit in memory is unchanged by the operation then it is deemed to have failed. diff --git a/Documentation/devicetree/bindings/net/qcom-emac.txt b/Documentation/devicetree/bindings/net/qcom-emac.txt index e6cb2291471c..7ae8aa148634 100644 --- a/Documentation/devicetree/bindings/net/qcom-emac.txt +++ b/Documentation/devicetree/bindings/net/qcom-emac.txt @@ -14,7 +14,7 @@ MAC node: - mac-address : The 6-byte MAC address. If present, it is the default MAC address. - internal-phy : phandle to the internal PHY node -- phy-handle : phandle the external PHY node +- phy-handle : phandle to the external PHY node Internal PHY node: - compatible : Should be "qcom,fsm9900-emac-sgmii" or "qcom,qdf2432-emac-sgmii". diff --git a/Documentation/devicetree/bindings/regulator/nxp,pca9450-regulator.yaml b/Documentation/devicetree/bindings/regulator/nxp,pca9450-regulator.yaml index b539781e39aa..835b53302db8 100644 --- a/Documentation/devicetree/bindings/regulator/nxp,pca9450-regulator.yaml +++ b/Documentation/devicetree/bindings/regulator/nxp,pca9450-regulator.yaml @@ -47,12 +47,6 @@ properties: description: Properties for single LDO regulator. - properties: - regulator-name: - pattern: "^LDO[1-5]$" - description: - should be "LDO1", ..., "LDO5" - unevaluatedProperties: false "^BUCK[1-6]$": @@ -62,11 +56,6 @@ properties: Properties for single BUCK regulator. properties: - regulator-name: - pattern: "^BUCK[1-6]$" - description: - should be "BUCK1", ..., "BUCK6" - nxp,dvs-run-voltage: $ref: "/schemas/types.yaml#/definitions/uint32" minimum: 600000 diff --git a/Documentation/devicetree/bindings/spi/cdns,qspi-nor-peripheral-props.yaml b/Documentation/devicetree/bindings/spi/cdns,qspi-nor-peripheral-props.yaml index 553601a441a7..510b82c177c0 100644 --- a/Documentation/devicetree/bindings/spi/cdns,qspi-nor-peripheral-props.yaml +++ b/Documentation/devicetree/bindings/spi/cdns,qspi-nor-peripheral-props.yaml @@ -10,7 +10,7 @@ description: See spi-peripheral-props.yaml for more info. maintainers: - - Pratyush Yadav <[email protected]> + - Vaishnav Achath <[email protected]> properties: # cdns,qspi-nor.yaml diff --git a/Documentation/devicetree/bindings/spi/cdns,qspi-nor.yaml b/Documentation/devicetree/bindings/spi/cdns,qspi-nor.yaml index 0a537fa3a641..4707294d8f59 100644 --- a/Documentation/devicetree/bindings/spi/cdns,qspi-nor.yaml +++ b/Documentation/devicetree/bindings/spi/cdns,qspi-nor.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Cadence Quad SPI controller maintainers: - - Pratyush Yadav <[email protected]> + - Vaishnav Achath <[email protected]> allOf: - $ref: spi-controller.yaml# diff --git a/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml b/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml index ce048e782e80..a4abe1588005 100644 --- a/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml +++ b/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml @@ -16,7 +16,7 @@ description: their own separate schema that should be referenced from here. maintainers: - - Pratyush Yadav <[email protected]> + - Mark Brown <[email protected]> properties: reg: diff --git a/Documentation/devicetree/bindings/thermal/rcar-thermal.yaml b/Documentation/devicetree/bindings/thermal/rcar-thermal.yaml index 00dcbdd36144..119998d10ff4 100644 --- a/Documentation/devicetree/bindings/thermal/rcar-thermal.yaml +++ b/Documentation/devicetree/bindings/thermal/rcar-thermal.yaml @@ -42,7 +42,7 @@ properties: description: Address ranges of the thermal registers. If more then one range is given the first one must be the common registers followed by each sensor - according the datasheet. + according to the datasheet. minItems: 1 maxItems: 4 diff --git a/Documentation/kbuild/kconfig-language.rst b/Documentation/kbuild/kconfig-language.rst index 7fb398649f51..858ed5d80def 100644 --- a/Documentation/kbuild/kconfig-language.rst +++ b/Documentation/kbuild/kconfig-language.rst @@ -525,8 +525,8 @@ followed by a test macro:: If you need to expose a compiler capability to makefiles and/or C source files, `CC_HAS_` is the recommended prefix for the config option:: - config CC_HAS_ASM_GOTO - def_bool $(success,$(srctree)/scripts/gcc-goto.sh $(CC)) + config CC_HAS_FOO + def_bool $(success,$(srctree)/scripts/cc-check-foo.sh $(CC)) Build as module only ~~~~~~~~~~~~~~~~~~~~ diff --git a/Documentation/tools/rtla/rtla-timerlat-hist.rst b/Documentation/tools/rtla/rtla-timerlat-hist.rst index e12eae1f3301..6bf7f0ca4556 100644 --- a/Documentation/tools/rtla/rtla-timerlat-hist.rst +++ b/Documentation/tools/rtla/rtla-timerlat-hist.rst @@ -33,7 +33,7 @@ EXAMPLE ======= In the example below, **rtla timerlat hist** is set to run for *10* minutes, in the cpus *0-4*, *skipping zero* only lines. Moreover, **rtla timerlat -hist** will change the priority of the *timelat* threads to run under +hist** will change the priority of the *timerlat* threads to run under *SCHED_DEADLINE* priority, with a *10us* runtime every *1ms* period. The *1ms* period is also passed to the *timerlat* tracer:: diff --git a/MAINTAINERS b/MAINTAINERS index 8a5012ba6ff9..9d7f64dc0efe 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2178,7 +2178,7 @@ M: Jean-Marie Verdun <[email protected]> M: Nick Hawkins <[email protected]> S: Maintained F: Documentation/devicetree/bindings/arm/hpe,gxp.yaml -F: Documentation/devicetree/bindings/spi/hpe,gxp-spi.yaml +F: Documentation/devicetree/bindings/spi/hpe,gxp-spifi.yaml F: Documentation/devicetree/bindings/timer/hpe,gxp-timer.yaml F: arch/arm/boot/dts/hpe-bmc* F: arch/arm/boot/dts/hpe-gxp* @@ -5145,6 +5145,7 @@ T: git git://git.samba.org/sfrench/cifs-2.6.git F: Documentation/admin-guide/cifs/ F: fs/cifs/ F: fs/smbfs_common/ +F: include/uapi/linux/cifs COMPACTPCI HOTPLUG CORE M: Scott Murray <[email protected]> @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 0 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* @@ -1113,13 +1113,11 @@ vmlinux-alldirs := $(sort $(vmlinux-dirs) Documentation \ $(patsubst %/,%,$(filter %/, $(core-) \ $(drivers-) $(libs-)))) -subdir-modorder := $(addsuffix modules.order,$(filter %/, \ - $(core-y) $(core-m) $(libs-y) $(libs-m) \ - $(drivers-y) $(drivers-m))) - build-dirs := $(vmlinux-dirs) clean-dirs := $(vmlinux-alldirs) +subdir-modorder := $(addsuffix /modules.order, $(build-dirs)) + # Externally visible symbols (used by link-vmlinux.sh) KBUILD_VMLINUX_OBJS := $(head-y) $(patsubst %/,%/built-in.a, $(core-y)) KBUILD_VMLINUX_OBJS += $(addsuffix built-in.a, $(filter %/, $(libs-y))) diff --git a/arch/Kconfig b/arch/Kconfig index f330410da63a..5dbf11a5ba4e 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -53,7 +53,6 @@ config KPROBES config JUMP_LABEL bool "Optimize very unlikely/likely branches" depends on HAVE_ARCH_JUMP_LABEL - depends on CC_HAS_ASM_GOTO select OBJTOOL if HAVE_JUMP_LABEL_HACK help This option enables a transparent branch optimization that @@ -1361,7 +1360,7 @@ config HAVE_PREEMPT_DYNAMIC_CALL config HAVE_PREEMPT_DYNAMIC_KEY bool - depends on HAVE_ARCH_JUMP_LABEL && CC_HAS_ASM_GOTO + depends on HAVE_ARCH_JUMP_LABEL select HAVE_PREEMPT_DYNAMIC help An architecture should select this if it can handle the preemption diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f38ef299f13b..e9c9388ccc02 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -929,6 +929,10 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); (system_supports_mte() && \ test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &(kvm)->arch.flags)) +#define kvm_supports_32bit_el0() \ + (system_supports_32bit_el0() && \ + !static_branch_unlikely(&arm64_mismatched_32bit_el0)) + int kvm_trng_call(struct kvm_vcpu *vcpu); #ifdef CONFIG_KVM extern phys_addr_t hyp_mem_base; diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 3bb134355874..316917b98707 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -75,9 +75,11 @@ struct kvm_regs { /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ #define KVM_ARM_DEVICE_TYPE_SHIFT 0 -#define KVM_ARM_DEVICE_TYPE_MASK (0xffff << KVM_ARM_DEVICE_TYPE_SHIFT) +#define KVM_ARM_DEVICE_TYPE_MASK GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \ + KVM_ARM_DEVICE_TYPE_SHIFT) #define KVM_ARM_DEVICE_ID_SHIFT 16 -#define KVM_ARM_DEVICE_ID_MASK (0xffff << KVM_ARM_DEVICE_ID_SHIFT) +#define KVM_ARM_DEVICE_ID_MASK GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \ + KVM_ARM_DEVICE_ID_SHIFT) /* Supported device IDs */ #define KVM_ARM_DEVICE_VGIC_V2 0 diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 986cee6fbc7f..2ff0ef62abad 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -757,8 +757,7 @@ static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu) if (likely(!vcpu_mode_is_32bit(vcpu))) return false; - return !system_supports_32bit_el0() || - static_branch_unlikely(&arm64_mismatched_32bit_el0); + return !kvm_supports_32bit_el0(); } /** diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 8c607199cad1..f802a3b3f8db 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -242,7 +242,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) u64 mode = (*(u64 *)valp) & PSR_AA32_MODE_MASK; switch (mode) { case PSR_AA32_MODE_USR: - if (!system_supports_32bit_el0()) + if (!kvm_supports_32bit_el0()) return -EINVAL; break; case PSR_AA32_MODE_FIQ: diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 87f1cd0df36e..c9a13e487187 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -993,7 +993,7 @@ transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot, * THP doesn't start to split while we are adjusting the * refcounts. * - * We are sure this doesn't happen, because mmu_notifier_retry + * We are sure this doesn't happen, because mmu_invalidate_retry * was successful and we are holding the mmu_lock, so if this * THP is trying to split, it will be blocked in the mmu * notifier before touching any of the pages, specifically @@ -1188,9 +1188,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return ret; } - mmu_seq = vcpu->kvm->mmu_notifier_seq; + mmu_seq = vcpu->kvm->mmu_invalidate_seq; /* - * Ensure the read of mmu_notifier_seq happens before we call + * Ensure the read of mmu_invalidate_seq happens before we call * gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk * the page we just got a reference to gets unmapped before we have a * chance to grab the mmu_lock, which ensure that if the page gets @@ -1246,7 +1246,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, else write_lock(&kvm->mmu_lock); pgt = vcpu->arch.hw_mmu->pgt; - if (mmu_notifier_retry(kvm, mmu_seq)) + if (mmu_invalidate_retry(kvm, mmu_seq)) goto out_unlock; /* diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index c059b259aea6..3234f50b8c4b 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -652,7 +652,7 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) */ val = ((pmcr & ~ARMV8_PMU_PMCR_MASK) | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E); - if (!system_supports_32bit_el0()) + if (!kvm_supports_32bit_el0()) val |= ARMV8_PMU_PMCR_LC; __vcpu_sys_reg(vcpu, r->reg) = val; } @@ -701,7 +701,7 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, val = __vcpu_sys_reg(vcpu, PMCR_EL0); val &= ~ARMV8_PMU_PMCR_MASK; val |= p->regval & ARMV8_PMU_PMCR_MASK; - if (!system_supports_32bit_el0()) + if (!kvm_supports_32bit_el0()) val |= ARMV8_PMU_PMCR_LC; __vcpu_sys_reg(vcpu, PMCR_EL0) = val; kvm_pmu_handle_pmcr(vcpu, val); diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h index 4b130199ceae..d06d4542b634 100644 --- a/arch/loongarch/include/asm/irq.h +++ b/arch/loongarch/include/asm/irq.h @@ -81,7 +81,6 @@ extern struct acpi_vector_group msi_group[MAX_IO_PICS]; #define GSI_MIN_PCH_IRQ LOONGSON_PCH_IRQ_BASE #define GSI_MAX_PCH_IRQ (LOONGSON_PCH_IRQ_BASE + 256 - 1) -extern int find_pch_pic(u32 gsi); struct acpi_madt_lio_pic; struct acpi_madt_eio_pic; struct acpi_madt_ht_pic; diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 717716cc51c5..5cedb28e8a40 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -84,8 +84,6 @@ #define KVM_MAX_VCPUS 16 -/* memory slots that does not exposed to userspace */ -#define KVM_PRIVATE_MEM_SLOTS 0 #define KVM_HALT_POLL_NS_DEFAULT 500000 diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c index db17e870bdff..74cd64a24d05 100644 --- a/arch/mips/kvm/mmu.c +++ b/arch/mips/kvm/mmu.c @@ -615,17 +615,17 @@ retry: * Used to check for invalidations in progress, of the pfn that is * returned by pfn_to_pfn_prot below. */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; /* - * Ensure the read of mmu_notifier_seq isn't reordered with PTE reads in - * gfn_to_pfn_prot() (which calls get_user_pages()), so that we don't + * Ensure the read of mmu_invalidate_seq isn't reordered with PTE reads + * in gfn_to_pfn_prot() (which calls get_user_pages()), so that we don't * risk the page we get a reference to getting unmapped before we have a - * chance to grab the mmu_lock without mmu_notifier_retry() noticing. + * chance to grab the mmu_lock without mmu_invalidate_retry() noticing. * * This smp_rmb() pairs with the effective smp_wmb() of the combination * of the pte_unmap_unlock() after the PTE is zapped, and the * spin_lock() in kvm_mmu_notifier_invalidate_<page|range_end>() before - * mmu_notifier_seq is incremented. + * mmu_invalidate_seq is incremented. */ smp_rmb(); @@ -638,7 +638,7 @@ retry: spin_lock(&kvm->mmu_lock); /* Check if an invalidation has taken place since we got pfn */ - if (mmu_notifier_retry(kvm, mmu_seq)) { + if (mmu_invalidate_retry(kvm, mmu_seq)) { /* * This can happen when mappings are changed asynchronously, but * also synchronously if a COW is triggered by diff --git a/arch/nios2/include/asm/entry.h b/arch/nios2/include/asm/entry.h index cf37f55efbc2..bafb7b2ca59f 100644 --- a/arch/nios2/include/asm/entry.h +++ b/arch/nios2/include/asm/entry.h @@ -50,7 +50,8 @@ stw r13, PT_R13(sp) stw r14, PT_R14(sp) stw r15, PT_R15(sp) - stw r2, PT_ORIG_R2(sp) + movi r24, -1 + stw r24, PT_ORIG_R2(sp) stw r7, PT_ORIG_R7(sp) stw ra, PT_RA(sp) diff --git a/arch/nios2/include/asm/ptrace.h b/arch/nios2/include/asm/ptrace.h index 642462144872..9da34c3022a2 100644 --- a/arch/nios2/include/asm/ptrace.h +++ b/arch/nios2/include/asm/ptrace.h @@ -74,6 +74,8 @@ extern void show_regs(struct pt_regs *); ((struct pt_regs *)((unsigned long)current_thread_info() + THREAD_SIZE)\ - 1) +#define force_successful_syscall_return() (current_pt_regs()->orig_r2 = -1) + int do_syscall_trace_enter(void); void do_syscall_trace_exit(void); #endif /* __ASSEMBLY__ */ diff --git a/arch/nios2/kernel/entry.S b/arch/nios2/kernel/entry.S index 0794cd7803df..99f0a65e6234 100644 --- a/arch/nios2/kernel/entry.S +++ b/arch/nios2/kernel/entry.S @@ -185,6 +185,7 @@ ENTRY(handle_system_call) ldw r5, PT_R5(sp) local_restart: + stw r2, PT_ORIG_R2(sp) /* Check that the requested system call is within limits */ movui r1, __NR_syscalls bgeu r2, r1, ret_invsyscall @@ -192,7 +193,6 @@ local_restart: movhi r11, %hiadj(sys_call_table) add r1, r1, r11 ldw r1, %lo(sys_call_table)(r1) - beq r1, r0, ret_invsyscall /* Check if we are being traced */ GET_THREAD_INFO r11 @@ -213,6 +213,9 @@ local_restart: translate_rc_and_ret: movi r1, 0 bge r2, zero, 3f + ldw r1, PT_ORIG_R2(sp) + addi r1, r1, 1 + beq r1, zero, 3f sub r2, zero, r2 movi r1, 1 3: @@ -255,9 +258,9 @@ traced_system_call: ldw r6, PT_R6(sp) ldw r7, PT_R7(sp) - /* Fetch the syscall function, we don't need to check the boundaries - * since this is already done. - */ + /* Fetch the syscall function. */ + movui r1, __NR_syscalls + bgeu r2, r1, traced_invsyscall slli r1, r2, 2 movhi r11,%hiadj(sys_call_table) add r1, r1, r11 @@ -276,6 +279,9 @@ traced_system_call: translate_rc_and_ret2: movi r1, 0 bge r2, zero, 4f + ldw r1, PT_ORIG_R2(sp) + addi r1, r1, 1 + beq r1, zero, 4f sub r2, zero, r2 movi r1, 1 4: @@ -287,6 +293,11 @@ end_translate_rc_and_ret2: RESTORE_SWITCH_STACK br ret_from_exception + /* If the syscall number was invalid return ENOSYS */ +traced_invsyscall: + movi r2, -ENOSYS + br translate_rc_and_ret2 + Luser_return: GET_THREAD_INFO r11 /* get thread_info pointer */ ldw r10, TI_FLAGS(r11) /* get thread_info->flags */ @@ -336,9 +347,6 @@ external_interrupt: /* skip if no interrupt is pending */ beq r12, r0, ret_from_interrupt - movi r24, -1 - stw r24, PT_ORIG_R2(sp) - /* * Process an external hardware interrupt. */ diff --git a/arch/nios2/kernel/signal.c b/arch/nios2/kernel/signal.c index cb0b91589cf2..a5b93a30c6eb 100644 --- a/arch/nios2/kernel/signal.c +++ b/arch/nios2/kernel/signal.c @@ -242,7 +242,7 @@ static int do_signal(struct pt_regs *regs) /* * If we were from a system call, check for system call restarting... */ - if (regs->orig_r2 >= 0) { + if (regs->orig_r2 >= 0 && regs->r1) { continue_addr = regs->ea; restart_addr = continue_addr - 4; retval = regs->r2; @@ -264,6 +264,7 @@ static int do_signal(struct pt_regs *regs) regs->ea = restart_addr; break; } + regs->orig_r2 = -1; } if (get_signal(&ksig)) { diff --git a/arch/nios2/kernel/syscall_table.c b/arch/nios2/kernel/syscall_table.c index 6176d63023c1..c2875a6dd5a4 100644 --- a/arch/nios2/kernel/syscall_table.c +++ b/arch/nios2/kernel/syscall_table.c @@ -13,5 +13,6 @@ #define __SYSCALL(nr, call) [nr] = (call), void *sys_call_table[__NR_syscalls] = { + [0 ... __NR_syscalls-1] = sys_ni_syscall, #include <asm/unistd.h> }; diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 4def2bd17b9b..d49065af08e9 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -666,7 +666,7 @@ static inline pte_t *find_kvm_host_pte(struct kvm *kvm, unsigned long mmu_seq, VM_WARN(!spin_is_locked(&kvm->mmu_lock), "%s called with kvm mmu_lock not held \n", __func__); - if (mmu_notifier_retry(kvm, mmu_seq)) + if (mmu_invalidate_retry(kvm, mmu_seq)) return NULL; pte = __find_linux_pte(kvm->mm->pgd, ea, NULL, hshift); diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index bdd3332200c5..31de91c8359c 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -68,10 +68,6 @@ void __init set_pci_dma_ops(const struct dma_map_ops *dma_ops) pci_dma_ops = dma_ops; } -/* - * This function should run under locking protection, specifically - * hose_spinlock. - */ static int get_phb_number(struct device_node *dn) { int ret, phb_id = -1; @@ -108,15 +104,20 @@ static int get_phb_number(struct device_node *dn) if (!ret) phb_id = (int)(prop & (MAX_PHBS - 1)); + spin_lock(&hose_spinlock); + /* We need to be sure to not use the same PHB number twice. */ if ((phb_id >= 0) && !test_and_set_bit(phb_id, phb_bitmap)) - return phb_id; + goto out_unlock; /* If everything fails then fallback to dynamic PHB numbering. */ phb_id = find_first_zero_bit(phb_bitmap, MAX_PHBS); BUG_ON(phb_id >= MAX_PHBS); set_bit(phb_id, phb_bitmap); +out_unlock: + spin_unlock(&hose_spinlock); + return phb_id; } @@ -127,10 +128,13 @@ struct pci_controller *pcibios_alloc_controller(struct device_node *dev) phb = zalloc_maybe_bootmem(sizeof(struct pci_controller), GFP_KERNEL); if (phb == NULL) return NULL; - spin_lock(&hose_spinlock); + phb->global_number = get_phb_number(dev); + + spin_lock(&hose_spinlock); list_add_tail(&phb->list_node, &hose_list); spin_unlock(&hose_spinlock); + phb->dn = dev; phb->is_dynamic = slab_is_available(); #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index 1ae09992c9ea..bc6a381b5346 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -90,7 +90,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, unsigned long pfn; /* used to check for invalidations in progress */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); /* Get host physical address for gpa */ @@ -151,7 +151,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, cpte = kvmppc_mmu_hpte_cache_next(vcpu); spin_lock(&kvm->mmu_lock); - if (!cpte || mmu_notifier_retry(kvm, mmu_seq)) { + if (!cpte || mmu_invalidate_retry(kvm, mmu_seq)) { r = -EAGAIN; goto out_unlock; } diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 514fd45c1994..e9744b41a226 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -578,7 +578,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu, return -EFAULT; /* used to check for invalidations in progress */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); ret = -EFAULT; @@ -693,7 +693,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu, /* Check if we might have been invalidated; let the guest retry if so */ ret = RESUME_GUEST; - if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) { + if (mmu_invalidate_retry(vcpu->kvm, mmu_seq)) { unlock_rmap(rmap); goto out_unlock; } diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 9d4b3feda3b6..5d5e12f3bf86 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -640,7 +640,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, /* Check if we might have been invalidated; let the guest retry if so */ spin_lock(&kvm->mmu_lock); ret = -EAGAIN; - if (mmu_notifier_retry(kvm, mmu_seq)) + if (mmu_invalidate_retry(kvm, mmu_seq)) goto out_unlock; /* Now traverse again under the lock and change the tree */ @@ -830,7 +830,7 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, bool large_enable; /* used to check for invalidations in progress */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); /* @@ -1191,7 +1191,7 @@ void kvmppc_radix_flush_memslot(struct kvm *kvm, * Increase the mmu notifier sequence number to prevent any page * fault that read the memslot earlier from writing a PTE. */ - kvm->mmu_notifier_seq++; + kvm->mmu_invalidate_seq++; spin_unlock(&kvm->mmu_lock); } diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index be8249cc6107..5a64a1341e6f 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -1580,7 +1580,7 @@ static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu, /* 2. Find the host pte for this L1 guest real address */ /* Used to check for invalidations in progress */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); /* See if can find translation in our partition scoped tables for L1 */ diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 2257fb18cb72..5a05953ae13f 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -219,7 +219,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, g_ptel = ptel; /* used later to detect if we might have been invalidated */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); /* Find the memslot (if any) for this address */ @@ -366,7 +366,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, rmap = real_vmalloc_addr(rmap); lock_rmap(rmap); /* Check for pending invalidations under the rmap chain lock */ - if (mmu_notifier_retry(kvm, mmu_seq)) { + if (mmu_invalidate_retry(kvm, mmu_seq)) { /* inval in progress, write a non-present HPTE */ pteh |= HPTE_V_ABSENT; pteh &= ~HPTE_V_VALID; @@ -932,7 +932,7 @@ static long kvmppc_do_h_page_init_zero(struct kvm_vcpu *vcpu, int i; /* Used later to detect if we might have been invalidated */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); @@ -960,7 +960,7 @@ static long kvmppc_do_h_page_init_copy(struct kvm_vcpu *vcpu, long ret = H_SUCCESS; /* Used later to detect if we might have been invalidated */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 7f16afc331ef..05668e964140 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -339,7 +339,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, unsigned long flags; /* used to check for invalidations in progress */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); /* @@ -460,7 +460,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, } spin_lock(&kvm->mmu_lock); - if (mmu_notifier_retry(kvm, mmu_seq)) { + if (mmu_invalidate_retry(kvm, mmu_seq)) { ret = -EAGAIN; goto out; } diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 553d755483ed..3b5583db9d80 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -28,7 +28,7 @@ unsigned long elf_hwcap __read_mostly; /* Host ISA bitmap */ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly; -__ro_after_init DEFINE_STATIC_KEY_ARRAY_FALSE(riscv_isa_ext_keys, RISCV_ISA_EXT_KEY_MAX); +DEFINE_STATIC_KEY_ARRAY_FALSE(riscv_isa_ext_keys, RISCV_ISA_EXT_KEY_MAX); EXPORT_SYMBOL(riscv_isa_ext_keys); /** diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index 3a35b2d95697..3620ecac2fa1 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -666,7 +666,7 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu, return ret; } - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; hfn = gfn_to_pfn_prot(kvm, gfn, is_write, &writable); if (hfn == KVM_PFN_ERR_HWPOISON) { @@ -686,7 +686,7 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu, spin_lock(&kvm->mmu_lock); - if (mmu_notifier_retry(kvm, mmu_seq)) + if (mmu_invalidate_retry(kvm, mmu_seq)) goto out_unlock; if (writable) { diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index f0bc4dc3e9bf..6511d15ace45 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -437,7 +437,7 @@ __init int hypfs_diag_init(void) int rc; if (diag204_probe()) { - pr_err("The hardware system does not support hypfs\n"); + pr_info("The hardware system does not support hypfs\n"); return -ENODATA; } diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 5c97f48cea91..ee919bfc8186 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -496,9 +496,9 @@ fail_hypfs_sprp_exit: hypfs_vm_exit(); fail_hypfs_diag_exit: hypfs_diag_exit(); + pr_err("Initialization of hypfs failed with rc=%i\n", rc); fail_dbfs_exit: hypfs_dbfs_exit(); - pr_err("Initialization of hypfs failed with rc=%i\n", rc); return rc; } device_initcall(hypfs_init) diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index 79e38afd4b91..e719af8bdf56 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -1011,7 +1011,7 @@ error_kzalloc: static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], u32 sizes[], const bool *ctx, + const char * const names[], const bool *ctx, struct irq_affinity *desc) { struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); diff --git a/arch/um/include/asm/cpufeature.h b/arch/um/include/asm/cpufeature.h index 19cd7ed6ec3c..4b6d1b526bc1 100644 --- a/arch/um/include/asm/cpufeature.h +++ b/arch/um/include/asm/cpufeature.h @@ -65,20 +65,6 @@ extern void setup_clear_cpu_cap(unsigned int bit); #define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit) -#if defined(__clang__) && !defined(CONFIG_CC_HAS_ASM_GOTO) - -/* - * Workaround for the sake of BPF compilation which utilizes kernel - * headers, but clang does not support ASM GOTO and fails the build. - */ -#ifndef __BPF_TRACING__ -#warning "Compiler lacks ASM_GOTO support. Add -D __BPF_TRACING__ to your compiler arguments" -#endif - -#define static_cpu_has(bit) boot_cpu_has(bit) - -#else - /* * Static testing of CPU features. Used the same as boot_cpu_has(). It * statically patches the target code for additional performance. Use @@ -137,7 +123,6 @@ t_no: boot_cpu_has(bit) : \ _static_cpu_has(bit) \ ) -#endif #define cpu_has_bug(c, bit) cpu_has(c, (bit)) #define set_cpu_bug(c, bit) set_cpu_cap(c, (bit)) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 7854685c5f25..bafbd905e6e7 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -286,10 +286,6 @@ vdso_install: archprepare: checkbin checkbin: -ifndef CONFIG_CC_HAS_ASM_GOTO - @echo Compiler lacks asm-goto support. - @exit 1 -endif ifdef CONFIG_RETPOLINE ifeq ($(RETPOLINE_CFLAGS),) @echo "You are building kernel with non-retpoline compiler." >&2 diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index ea34cc31b047..1a85e1fb0922 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -155,20 +155,6 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); #define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit) -#if defined(__clang__) && !defined(CONFIG_CC_HAS_ASM_GOTO) - -/* - * Workaround for the sake of BPF compilation which utilizes kernel - * headers, but clang does not support ASM GOTO and fails the build. - */ -#ifndef __BPF_TRACING__ -#warning "Compiler lacks ASM_GOTO support. Add -D __BPF_TRACING__ to your compiler arguments" -#endif - -#define static_cpu_has(bit) boot_cpu_has(bit) - -#else - /* * Static testing of CPU features. Used the same as boot_cpu_has(). It * statically patches the target code for additional performance. Use @@ -208,7 +194,6 @@ t_no: boot_cpu_has(bit) : \ _static_cpu_has(bit) \ ) -#endif #define cpu_has_bug(c, bit) cpu_has(c, (bit)) #define set_cpu_bug(c, bit) set_cpu_cap(c, (bit)) diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h index 503622627400..991e31cfde94 100644 --- a/arch/x86/include/asm/extable_fixup_types.h +++ b/arch/x86/include/asm/extable_fixup_types.h @@ -64,4 +64,6 @@ #define EX_TYPE_UCOPY_LEN4 (EX_TYPE_UCOPY_LEN | EX_DATA_IMM(4)) #define EX_TYPE_UCOPY_LEN8 (EX_TYPE_UCOPY_LEN | EX_DATA_IMM(8)) +#define EX_TYPE_ZEROPAD 20 /* longword load with zeropad on fault */ + #endif diff --git a/arch/x86/include/asm/ibt.h b/arch/x86/include/asm/ibt.h index 689880eca9ba..9b08082a5d9f 100644 --- a/arch/x86/include/asm/ibt.h +++ b/arch/x86/include/asm/ibt.h @@ -31,6 +31,16 @@ #define __noendbr __attribute__((nocf_check)) +/* + * Create a dummy function pointer reference to prevent objtool from marking + * the function as needing to be "sealed" (i.e. ENDBR converted to NOP by + * apply_ibt_endbr()). + */ +#define IBT_NOSEAL(fname) \ + ".pushsection .discard.ibt_endbr_noseal\n\t" \ + _ASM_PTR fname "\n\t" \ + ".popsection\n\t" + static inline __attribute_const__ u32 gen_endbr(void) { u32 endbr; @@ -84,6 +94,7 @@ extern __noendbr void ibt_restore(u64 save); #ifndef __ASSEMBLY__ #define ASM_ENDBR +#define IBT_NOSEAL(name) #define __noendbr diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 5ffa578cafe1..2c96c43c313a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -53,7 +53,7 @@ #define KVM_MAX_VCPU_IDS (KVM_MAX_VCPUS * KVM_VCPU_ID_RATIO) /* memory slots that are not exposed to userspace */ -#define KVM_PRIVATE_MEM_SLOTS 3 +#define KVM_INTERNAL_MEM_SLOTS 3 #define KVM_HALT_POLL_NS_DEFAULT 200000 diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index 8a9eba191516..7fa611216417 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -11,7 +11,7 @@ #define __CLOBBERS_MEM(clb...) "memory", ## clb -#if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CONFIG_CC_HAS_ASM_GOTO) +#ifndef __GCC_ASM_FLAG_OUTPUTS__ /* Use asm goto */ @@ -27,7 +27,7 @@ cc_label: c = true; \ c; \ }) -#else /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CONFIG_CC_HAS_ASM_GOTO) */ +#else /* defined(__GCC_ASM_FLAG_OUTPUTS__) */ /* Use flags output or a set instruction */ @@ -40,7 +40,7 @@ cc_label: c = true; \ c; \ }) -#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CONFIG_CC_HAS_ASM_GOTO) */ +#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) */ #define GEN_UNARY_RMWcc_4(op, var, cc, arg0) \ __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM()) diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h index 8338b0432b50..46b4f1f7f354 100644 --- a/arch/x86/include/asm/word-at-a-time.h +++ b/arch/x86/include/asm/word-at-a-time.h @@ -77,58 +77,18 @@ static inline unsigned long find_zero(unsigned long mask) * and the next page not being mapped, take the exception and * return zeroes in the non-existing part. */ -#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT - static inline unsigned long load_unaligned_zeropad(const void *addr) { - unsigned long offset, data; unsigned long ret; - asm_volatile_goto( + asm volatile( "1: mov %[mem], %[ret]\n" - - _ASM_EXTABLE(1b, %l[do_exception]) - - : [ret] "=r" (ret) - : [mem] "m" (*(unsigned long *)addr) - : : do_exception); - - return ret; - -do_exception: - offset = (unsigned long)addr & (sizeof(long) - 1); - addr = (void *)((unsigned long)addr & ~(sizeof(long) - 1)); - data = *(unsigned long *)addr; - ret = data >> offset * 8; - - return ret; -} - -#else /* !CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ - -static inline unsigned long load_unaligned_zeropad(const void *addr) -{ - unsigned long offset, data; - unsigned long ret, err = 0; - - asm( "1: mov %[mem], %[ret]\n" "2:\n" - - _ASM_EXTABLE_FAULT(1b, 2b) - - : [ret] "=&r" (ret), "+a" (err) + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_ZEROPAD) + : [ret] "=r" (ret) : [mem] "m" (*(unsigned long *)addr)); - if (unlikely(err)) { - offset = (unsigned long)addr & (sizeof(long) - 1); - addr = (void *)((unsigned long)addr & ~(sizeof(long) - 1)); - data = *(unsigned long *)addr; - ret = data >> offset * 8; - } - return ret; } -#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ - #endif /* _ASM_WORD_AT_A_TIME_H */ diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 74167dc5f55e..4c3c27b6aea3 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -505,7 +505,7 @@ static void kprobe_emulate_jcc(struct kprobe *p, struct pt_regs *regs) match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^ ((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT); if (p->ainsn.jcc.type >= 0xe) - match = match && (regs->flags & X86_EFLAGS_ZF); + match = match || (regs->flags & X86_EFLAGS_ZF); } __kprobe_emulate_jmp(p, regs, (match && !invert) || (!match && invert)); } diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index b4eeb7c75dfa..d5ec3a2ed5a4 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -326,7 +326,8 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); ".align " __stringify(FASTOP_SIZE) " \n\t" \ ".type " name ", @function \n\t" \ name ":\n\t" \ - ASM_ENDBR + ASM_ENDBR \ + IBT_NOSEAL(name) #define FOP_FUNC(name) \ __FOP_FUNC(#name) @@ -446,27 +447,12 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); FOP_END /* Special case for SETcc - 1 instruction per cc */ - -/* - * Depending on .config the SETcc functions look like: - * - * ENDBR [4 bytes; CONFIG_X86_KERNEL_IBT] - * SETcc %al [3 bytes] - * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETHUNK] - * INT3 [1 byte; CONFIG_SLS] - */ -#define SETCC_ALIGN 16 - #define FOP_SETCC(op) \ - ".align " __stringify(SETCC_ALIGN) " \n\t" \ - ".type " #op ", @function \n\t" \ - #op ": \n\t" \ - ASM_ENDBR \ + FOP_FUNC(op) \ #op " %al \n\t" \ - __FOP_RET(#op) \ - ".skip " __stringify(SETCC_ALIGN) " - (.-" #op "), 0xcc \n\t" + FOP_RET(op) -__FOP_START(setcc, SETCC_ALIGN) +FOP_START(setcc) FOP_SETCC(seto) FOP_SETCC(setno) FOP_SETCC(setc) @@ -493,7 +479,7 @@ FOP_END; /* * XXX: inoutclob user must know where the argument is being expanded. - * Relying on CONFIG_CC_HAS_ASM_GOTO would allow us to remove _fault. + * Using asm goto would allow us to remove _fault. */ #define asm_safe(insn, inoutclob...) \ ({ \ @@ -1079,7 +1065,7 @@ static int em_bsr_c(struct x86_emulate_ctxt *ctxt) static __always_inline u8 test_cc(unsigned int condition, unsigned long flags) { u8 rc; - void (*fop)(void) = (void *)em_setcc + SETCC_ALIGN * (condition & 0xf); + void (*fop)(void) = (void *)em_setcc + FASTOP_SIZE * (condition & 0xf); flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF; asm("push %[flags]; popf; " CALL_NOSPEC diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index eccddb136954..126fa9aec64c 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2914,7 +2914,7 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) * If addresses are being invalidated, skip prefetching to avoid * accidentally prefetching those addresses. */ - if (unlikely(vcpu->kvm->mmu_notifier_count)) + if (unlikely(vcpu->kvm->mmu_invalidate_in_progress)) return; __direct_pte_prefetch(vcpu, sp, sptep); @@ -2928,7 +2928,7 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) * * There are several ways to safely use this helper: * - * - Check mmu_notifier_retry_hva() after grabbing the mapping level, before + * - Check mmu_invalidate_retry_hva() after grabbing the mapping level, before * consuming it. In this case, mmu_lock doesn't need to be held during the * lookup, but it does need to be held while checking the MMU notifier. * @@ -3056,7 +3056,7 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault return; /* - * mmu_notifier_retry() was successful and mmu_lock is held, so + * mmu_invalidate_retry() was successful and mmu_lock is held, so * the pmd can't be split from under us. */ fault->goal_level = fault->req_level; @@ -4203,7 +4203,7 @@ static bool is_page_fault_stale(struct kvm_vcpu *vcpu, return true; return fault->slot && - mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva); + mmu_invalidate_retry_hva(vcpu->kvm, mmu_seq, fault->hva); } static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) @@ -4227,7 +4227,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault if (r) return r; - mmu_seq = vcpu->kvm->mmu_notifier_seq; + mmu_seq = vcpu->kvm->mmu_invalidate_seq; smp_rmb(); r = kvm_faultin_pfn(vcpu, fault); @@ -6055,7 +6055,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) write_lock(&kvm->mmu_lock); - kvm_inc_notifier_count(kvm, gfn_start, gfn_end); + kvm_mmu_invalidate_begin(kvm, gfn_start, gfn_end); flush = kvm_rmap_zap_gfn_range(kvm, gfn_start, gfn_end); @@ -6069,7 +6069,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) kvm_flush_remote_tlbs_with_address(kvm, gfn_start, gfn_end - gfn_start); - kvm_dec_notifier_count(kvm, gfn_start, gfn_end); + kvm_mmu_invalidate_end(kvm, gfn_start, gfn_end); write_unlock(&kvm->mmu_lock); } diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index f5958071220c..39e0205e7300 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -589,7 +589,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, * If addresses are being invalidated, skip prefetching to avoid * accidentally prefetching those addresses. */ - if (unlikely(vcpu->kvm->mmu_notifier_count)) + if (unlikely(vcpu->kvm->mmu_invalidate_in_progress)) return; if (sp->role.direct) @@ -838,7 +838,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault else fault->max_level = walker.level; - mmu_seq = vcpu->kvm->mmu_notifier_seq; + mmu_seq = vcpu->kvm->mmu_invalidate_seq; smp_rmb(); r = kvm_faultin_pfn(vcpu, fault); diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 331310c29349..60814e110a54 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -41,6 +41,59 @@ static bool ex_handler_default(const struct exception_table_entry *e, return true; } +/* + * This is the *very* rare case where we do a "load_unaligned_zeropad()" + * and it's a page crosser into a non-existent page. + * + * This happens when we optimistically load a pathname a word-at-a-time + * and the name is less than the full word and the next page is not + * mapped. Typically that only happens for CONFIG_DEBUG_PAGEALLOC. + * + * NOTE! The faulting address is always a 'mov mem,reg' type instruction + * of size 'long', and the exception fixup must always point to right + * after the instruction. + */ +static bool ex_handler_zeropad(const struct exception_table_entry *e, + struct pt_regs *regs, + unsigned long fault_addr) +{ + struct insn insn; + const unsigned long mask = sizeof(long) - 1; + unsigned long offset, addr, next_ip, len; + unsigned long *reg; + + next_ip = ex_fixup_addr(e); + len = next_ip - regs->ip; + if (len > MAX_INSN_SIZE) + return false; + + if (insn_decode(&insn, (void *) regs->ip, len, INSN_MODE_KERN)) + return false; + if (insn.length != len) + return false; + + if (insn.opcode.bytes[0] != 0x8b) + return false; + if (insn.opnd_bytes != sizeof(long)) + return false; + + addr = (unsigned long) insn_get_addr_ref(&insn, regs); + if (addr == ~0ul) + return false; + + offset = addr & mask; + addr = addr & ~mask; + if (fault_addr != addr + sizeof(long)) + return false; + + reg = insn_get_modrm_reg_ptr(&insn, regs); + if (!reg) + return false; + + *reg = *(unsigned long *)addr >> (offset * 8); + return ex_handler_default(e, regs); +} + static bool ex_handler_fault(const struct exception_table_entry *fixup, struct pt_regs *regs, int trapnr) { @@ -217,6 +270,8 @@ int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code, return ex_handler_sgx(e, regs, trapnr); case EX_TYPE_UCOPY_LEN: return ex_handler_ucopy_len(e, regs, trapnr, reg, imm); + case EX_TYPE_ZEROPAD: + return ex_handler_zeropad(e, regs, fault_addr); } BUG(); } diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 39c5246964a9..0fe690ebc269 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -645,7 +645,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, pages++; spin_lock(&init_mm.page_table_lock); - prot = __pgprot(pgprot_val(prot) | __PAGE_KERNEL_LARGE); + prot = __pgprot(pgprot_val(prot) | _PAGE_PSE); set_pte_init((pte_t *)pud, pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT, diff --git a/block/blk-mq.c b/block/blk-mq.c index 5ee62b95f3e5..3c1e6b6d991d 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2229,26 +2229,6 @@ void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs) } EXPORT_SYMBOL(blk_mq_delay_run_hw_queues); -/** - * blk_mq_queue_stopped() - check whether one or more hctxs have been stopped - * @q: request queue. - * - * The caller is responsible for serializing this function against - * blk_mq_{start,stop}_hw_queue(). - */ -bool blk_mq_queue_stopped(struct request_queue *q) -{ - struct blk_mq_hw_ctx *hctx; - unsigned long i; - - queue_for_each_hw_ctx(q, hctx, i) - if (blk_mq_hctx_stopped(hctx)) - return true; - - return false; -} -EXPORT_SYMBOL(blk_mq_queue_stopped); - /* * This function is often used for pausing .queue_rq() by driver when * there isn't enough resource or some conditions aren't satisfied, and @@ -2570,7 +2550,7 @@ static void blk_mq_plug_issue_direct(struct blk_plug *plug, bool from_schedule) break; case BLK_STS_RESOURCE: case BLK_STS_DEV_RESOURCE: - blk_mq_request_bypass_insert(rq, false, last); + blk_mq_request_bypass_insert(rq, false, true); blk_mq_commit_rqs(hctx, &queued, from_schedule); return; default: diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index ef4508d72c02..7c128c89b454 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -2122,6 +2122,7 @@ const char *ata_get_cmd_name(u8 command) { ATA_CMD_WRITE_QUEUED_FUA_EXT, "WRITE DMA QUEUED FUA EXT" }, { ATA_CMD_FPDMA_READ, "READ FPDMA QUEUED" }, { ATA_CMD_FPDMA_WRITE, "WRITE FPDMA QUEUED" }, + { ATA_CMD_NCQ_NON_DATA, "NCQ NON-DATA" }, { ATA_CMD_FPDMA_SEND, "SEND FPDMA QUEUED" }, { ATA_CMD_FPDMA_RECV, "RECEIVE FPDMA QUEUED" }, { ATA_CMD_PIO_READ, "READ SECTOR(S)" }, diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 2b7d1db5c4a7..6a4a94b4cdf4 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -555,7 +555,7 @@ static inline struct ublk_uring_cmd_pdu *ublk_get_uring_cmd_pdu( return (struct ublk_uring_cmd_pdu *)&ioucmd->pdu; } -static bool ubq_daemon_is_dying(struct ublk_queue *ubq) +static inline bool ubq_daemon_is_dying(struct ublk_queue *ubq) { return ubq->ubq_daemon->flags & PF_EXITING; } @@ -605,8 +605,9 @@ static void ublk_complete_rq(struct request *req) } /* - * __ublk_fail_req() may be called from abort context or ->ubq_daemon - * context during exiting, so lock is required. + * Since __ublk_rq_task_work always fails requests immediately during + * exiting, __ublk_fail_req() is only called from abort context during + * exiting. So lock is unnecessary. * * Also aborting may not be started yet, keep in mind that one failed * request may be issued by block layer again. @@ -644,8 +645,7 @@ static inline void __ublk_rq_task_work(struct request *req) struct ublk_device *ub = ubq->dev; int tag = req->tag; struct ublk_io *io = &ubq->ios[tag]; - bool task_exiting = current != ubq->ubq_daemon || - (current->flags & PF_EXITING); + bool task_exiting = current != ubq->ubq_daemon || ubq_daemon_is_dying(ubq); unsigned int mapped_bytes; pr_devel("%s: complete: op %d, qid %d tag %d io_flags %x addr %llx\n", @@ -680,6 +680,11 @@ static inline void __ublk_rq_task_work(struct request *req) * do the copy work. */ io->flags &= ~UBLK_IO_FLAG_NEED_GET_DATA; + /* update iod->addr because ublksrv may have passed a new io buffer */ + ublk_get_iod(ubq, req->tag)->addr = io->addr; + pr_devel("%s: update iod->addr: op %d, qid %d tag %d io_flags %x addr %llx\n", + __func__, io->cmd->cmd_op, ubq->q_id, req->tag, io->flags, + ublk_get_iod(ubq, req->tag)->addr); } mapped_bytes = ublk_map_io(ubq, req, io); @@ -751,9 +756,25 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, if (task_work_add(ubq->ubq_daemon, &data->work, notify_mode)) goto fail; } else { - struct io_uring_cmd *cmd = ubq->ios[rq->tag].cmd; + struct ublk_io *io = &ubq->ios[rq->tag]; + struct io_uring_cmd *cmd = io->cmd; struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); + /* + * If the check pass, we know that this is a re-issued request aborted + * previously in monitor_work because the ubq_daemon(cmd's task) is + * PF_EXITING. We cannot call io_uring_cmd_complete_in_task() anymore + * because this ioucmd's io_uring context may be freed now if no inflight + * ioucmd exists. Otherwise we may cause null-deref in ctx->fallback_work. + * + * Note: monitor_work sets UBLK_IO_FLAG_ABORTED and ends this request(releasing + * the tag). Then the request is re-started(allocating the tag) and we are here. + * Since releasing/allocating a tag implies smp_mb(), finding UBLK_IO_FLAG_ABORTED + * guarantees that here is a re-issued request aborted previously. + */ + if ((io->flags & UBLK_IO_FLAG_ABORTED)) + goto fail; + pdu->req = rq; io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb); } diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 86d670c71286..ad068865ba20 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -168,21 +168,6 @@ void drm_gem_private_object_init(struct drm_device *dev, } EXPORT_SYMBOL(drm_gem_private_object_init); -static void -drm_gem_remove_prime_handles(struct drm_gem_object *obj, struct drm_file *filp) -{ - /* - * Note: obj->dma_buf can't disappear as long as we still hold a - * handle reference in obj->handle_count. - */ - mutex_lock(&filp->prime.lock); - if (obj->dma_buf) { - drm_prime_remove_buf_handle_locked(&filp->prime, - obj->dma_buf); - } - mutex_unlock(&filp->prime.lock); -} - /** * drm_gem_object_handle_free - release resources bound to userspace handles * @obj: GEM object to clean up. @@ -253,7 +238,7 @@ drm_gem_object_release_handle(int id, void *ptr, void *data) if (obj->funcs->close) obj->funcs->close(obj, file_priv); - drm_gem_remove_prime_handles(obj, file_priv); + drm_prime_remove_buf_handle(&file_priv->prime, id); drm_vma_node_revoke(&obj->vma_node, file_priv); drm_gem_object_handle_put_unlocked(obj); diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index 1fbbc19f1ac0..7bb98e6a446d 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -74,8 +74,8 @@ int drm_prime_fd_to_handle_ioctl(struct drm_device *dev, void *data, void drm_prime_init_file_private(struct drm_prime_file_private *prime_fpriv); void drm_prime_destroy_file_private(struct drm_prime_file_private *prime_fpriv); -void drm_prime_remove_buf_handle_locked(struct drm_prime_file_private *prime_fpriv, - struct dma_buf *dma_buf); +void drm_prime_remove_buf_handle(struct drm_prime_file_private *prime_fpriv, + uint32_t handle); /* drm_drv.c */ struct drm_minor *drm_minor_acquire(unsigned int minor_id); diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index a3f180653b8b..eb09e86044c6 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -190,29 +190,33 @@ static int drm_prime_lookup_buf_handle(struct drm_prime_file_private *prime_fpri return -ENOENT; } -void drm_prime_remove_buf_handle_locked(struct drm_prime_file_private *prime_fpriv, - struct dma_buf *dma_buf) +void drm_prime_remove_buf_handle(struct drm_prime_file_private *prime_fpriv, + uint32_t handle) { struct rb_node *rb; - rb = prime_fpriv->dmabufs.rb_node; + mutex_lock(&prime_fpriv->lock); + + rb = prime_fpriv->handles.rb_node; while (rb) { struct drm_prime_member *member; - member = rb_entry(rb, struct drm_prime_member, dmabuf_rb); - if (member->dma_buf == dma_buf) { + member = rb_entry(rb, struct drm_prime_member, handle_rb); + if (member->handle == handle) { rb_erase(&member->handle_rb, &prime_fpriv->handles); rb_erase(&member->dmabuf_rb, &prime_fpriv->dmabufs); - dma_buf_put(dma_buf); + dma_buf_put(member->dma_buf); kfree(member); - return; - } else if (member->dma_buf < dma_buf) { + break; + } else if (member->handle < handle) { rb = rb->rb_right; } else { rb = rb->rb_left; } } + + mutex_unlock(&prime_fpriv->lock); } void drm_prime_init_file_private(struct drm_prime_file_private *prime_fpriv) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 05076e530e7d..e29175e4b44c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -820,6 +820,15 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, if (ret == 0) { ret = nouveau_fence_new(chan, false, &fence); if (ret == 0) { + /* TODO: figure out a better solution here + * + * wait on the fence here explicitly as going through + * ttm_bo_move_accel_cleanup somehow doesn't seem to do it. + * + * Without this the operation can timeout and we'll fallback to a + * software copy, which might take several minutes to finish. + */ + nouveau_fence_wait(fence, false, false); ret = ttm_bo_move_accel_cleanup(bo, &fence->base, evict, false, diff --git a/drivers/gpu/drm/vc4/Kconfig b/drivers/gpu/drm/vc4/Kconfig index 061be9a6619d..b0f3117102ca 100644 --- a/drivers/gpu/drm/vc4/Kconfig +++ b/drivers/gpu/drm/vc4/Kconfig @@ -8,6 +8,7 @@ config DRM_VC4 depends on DRM depends on SND && SND_SOC depends on COMMON_CLK + depends on PM select DRM_DISPLAY_HDMI_HELPER select DRM_DISPLAY_HELPER select DRM_KMS_HELPER diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 592c3b5d03e6..1e5f68704d7d 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -2855,7 +2855,7 @@ static int vc5_hdmi_init_resources(struct vc4_hdmi *vc4_hdmi) return 0; } -static int __maybe_unused vc4_hdmi_runtime_suspend(struct device *dev) +static int vc4_hdmi_runtime_suspend(struct device *dev) { struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev); @@ -2972,17 +2972,15 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) vc4_hdmi->disable_4kp60 = true; } + pm_runtime_enable(dev); + /* - * We need to have the device powered up at this point to call - * our reset hook and for the CEC init. + * We need to have the device powered up at this point to call + * our reset hook and for the CEC init. */ - ret = vc4_hdmi_runtime_resume(dev); + ret = pm_runtime_resume_and_get(dev); if (ret) - goto err_put_ddc; - - pm_runtime_get_noresume(dev); - pm_runtime_set_active(dev); - pm_runtime_enable(dev); + goto err_disable_runtime_pm; if ((of_device_is_compatible(dev->of_node, "brcm,bcm2711-hdmi0") || of_device_is_compatible(dev->of_node, "brcm,bcm2711-hdmi1")) && @@ -3028,6 +3026,7 @@ err_destroy_conn: err_destroy_encoder: drm_encoder_cleanup(encoder); pm_runtime_put_sync(dev); +err_disable_runtime_pm: pm_runtime_disable(dev); err_put_ddc: put_device(&vc4_hdmi->ddc->dev); diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index 78fb1a4274a6..e47fa3465671 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -1572,9 +1572,7 @@ static int i2c_imx_remove(struct platform_device *pdev) struct imx_i2c_struct *i2c_imx = platform_get_drvdata(pdev); int irq, ret; - ret = pm_runtime_resume_and_get(&pdev->dev); - if (ret < 0) - return ret; + ret = pm_runtime_get_sync(&pdev->dev); hrtimer_cancel(&i2c_imx->slave_timer); @@ -1585,17 +1583,21 @@ static int i2c_imx_remove(struct platform_device *pdev) if (i2c_imx->dma) i2c_imx_dma_free(i2c_imx); - /* setup chip registers to defaults */ - imx_i2c_write_reg(0, i2c_imx, IMX_I2C_IADR); - imx_i2c_write_reg(0, i2c_imx, IMX_I2C_IFDR); - imx_i2c_write_reg(0, i2c_imx, IMX_I2C_I2CR); - imx_i2c_write_reg(0, i2c_imx, IMX_I2C_I2SR); + if (ret == 0) { + /* setup chip registers to defaults */ + imx_i2c_write_reg(0, i2c_imx, IMX_I2C_IADR); + imx_i2c_write_reg(0, i2c_imx, IMX_I2C_IFDR); + imx_i2c_write_reg(0, i2c_imx, IMX_I2C_I2CR); + imx_i2c_write_reg(0, i2c_imx, IMX_I2C_I2SR); + clk_disable(i2c_imx->clk); + } clk_notifier_unregister(i2c_imx->clk, &i2c_imx->clk_change_nb); irq = platform_get_irq(pdev, 0); if (irq >= 0) free_irq(irq, i2c_imx); - clk_disable_unprepare(i2c_imx->clk); + + clk_unprepare(i2c_imx->clk); pm_runtime_put_noidle(&pdev->dev); pm_runtime_disable(&pdev->dev); diff --git a/drivers/i2c/busses/i2c-scmi.c b/drivers/i2c/busses/i2c-scmi.c index 79798fc7462a..6746aa46d96c 100644 --- a/drivers/i2c/busses/i2c-scmi.c +++ b/drivers/i2c/busses/i2c-scmi.c @@ -30,7 +30,7 @@ struct acpi_smbus_cmi { u8 cap_info:1; u8 cap_read:1; u8 cap_write:1; - const struct smbus_methods_t *methods; + struct smbus_methods_t *methods; }; static const struct smbus_methods_t smbus_methods = { @@ -361,6 +361,7 @@ static acpi_status acpi_smbus_cmi_query_methods(acpi_handle handle, u32 level, static int acpi_smbus_cmi_add(struct acpi_device *device) { struct acpi_smbus_cmi *smbus_cmi; + const struct acpi_device_id *id; int ret; smbus_cmi = kzalloc(sizeof(struct acpi_smbus_cmi), GFP_KERNEL); @@ -368,7 +369,6 @@ static int acpi_smbus_cmi_add(struct acpi_device *device) return -ENOMEM; smbus_cmi->handle = device->handle; - smbus_cmi->methods = device_get_match_data(&device->dev); strcpy(acpi_device_name(device), ACPI_SMBUS_HC_DEVICE_NAME); strcpy(acpi_device_class(device), ACPI_SMBUS_HC_CLASS); device->driver_data = smbus_cmi; @@ -376,6 +376,11 @@ static int acpi_smbus_cmi_add(struct acpi_device *device) smbus_cmi->cap_read = 0; smbus_cmi->cap_write = 0; + for (id = acpi_smbus_cmi_ids; id->id[0]; id++) + if (!strcmp(id->id, acpi_device_hid(device))) + smbus_cmi->methods = + (struct smbus_methods_t *) id->driver_data; + acpi_walk_namespace(ACPI_TYPE_METHOD, smbus_cmi->handle, 1, acpi_smbus_cmi_query_methods, NULL, smbus_cmi, NULL); diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c index fce80a4a5147..04c04e6d24c3 100644 --- a/drivers/infiniband/core/umem_dmabuf.c +++ b/drivers/infiniband/core/umem_dmabuf.c @@ -18,6 +18,7 @@ int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf) struct scatterlist *sg; unsigned long start, end, cur = 0; unsigned int nmap = 0; + long ret; int i; dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv); @@ -67,9 +68,14 @@ wait_fence: * may be not up-to-date. Wait for the exporter to finish * the migration. */ - return dma_resv_wait_timeout(umem_dmabuf->attach->dmabuf->resv, + ret = dma_resv_wait_timeout(umem_dmabuf->attach->dmabuf->resv, DMA_RESV_USAGE_KERNEL, false, MAX_SCHEDULE_TIMEOUT); + if (ret < 0) + return ret; + if (ret == 0) + return -ETIMEDOUT; + return 0; } EXPORT_SYMBOL(ib_umem_dmabuf_map_pages); diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index c16017f6e8db..14392c942f49 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2468,31 +2468,24 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, opt2 |= CCTRL_ECN_V(1); } - skb_get(skb); - rpl = cplhdr(skb); if (!is_t4(adapter_type)) { - BUILD_BUG_ON(sizeof(*rpl5) != roundup(sizeof(*rpl5), 16)); - skb_trim(skb, sizeof(*rpl5)); - rpl5 = (void *)rpl; - INIT_TP_WR(rpl5, ep->hwtid); - } else { - skb_trim(skb, sizeof(*rpl)); - INIT_TP_WR(rpl, ep->hwtid); - } - OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, - ep->hwtid)); - - if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) { u32 isn = (prandom_u32() & ~7UL) - 1; + + skb = get_skb(skb, roundup(sizeof(*rpl5), 16), GFP_KERNEL); + rpl5 = __skb_put_zero(skb, roundup(sizeof(*rpl5), 16)); + rpl = (void *)rpl5; + INIT_TP_WR_CPL(rpl5, CPL_PASS_ACCEPT_RPL, ep->hwtid); opt2 |= T5_OPT_2_VALID_F; opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE); opt2 |= T5_ISS_F; - rpl5 = (void *)rpl; - memset_after(rpl5, 0, iss); if (peer2peer) isn += 4; rpl5->iss = cpu_to_be32(isn); pr_debug("iss %u\n", be32_to_cpu(rpl5->iss)); + } else { + skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL); + rpl = __skb_put_zero(skb, sizeof(*rpl)); + INIT_TP_WR_CPL(rpl, CPL_PASS_ACCEPT_RPL, ep->hwtid); } rpl->opt0 = cpu_to_be64(opt0); diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c index 72f08171a28a..bc3ec22a62c5 100644 --- a/drivers/infiniband/hw/erdma/erdma_qp.c +++ b/drivers/infiniband/hw/erdma/erdma_qp.c @@ -407,7 +407,7 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi, to_erdma_access_flags(reg_wr(send_wr)->access); regmr_sge->addr = cpu_to_le64(mr->ibmr.iova); regmr_sge->length = cpu_to_le32(mr->ibmr.length); - regmr_sge->stag = cpu_to_le32(mr->ibmr.lkey); + regmr_sge->stag = cpu_to_le32(reg_wr(send_wr)->key); attrs = FIELD_PREP(ERDMA_SQE_MR_MODE_MASK, 0) | FIELD_PREP(ERDMA_SQE_MR_ACCESS_MASK, mr->access) | FIELD_PREP(ERDMA_SQE_MR_MTT_CNT_MASK, diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index a7a3d42e2016..699bd3f59cd3 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -280,7 +280,7 @@ int erdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, attr->vendor_id = PCI_VENDOR_ID_ALIBABA; attr->vendor_part_id = dev->pdev->device; attr->hw_ver = dev->pdev->revision; - attr->max_qp = dev->attrs.max_qp; + attr->max_qp = dev->attrs.max_qp - 1; attr->max_qp_wr = min(dev->attrs.max_send_wr, dev->attrs.max_recv_wr); attr->max_qp_rd_atom = dev->attrs.max_ord; attr->max_qp_init_rd_atom = dev->attrs.max_ird; @@ -291,7 +291,7 @@ int erdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, attr->max_send_sge = dev->attrs.max_send_sge; attr->max_recv_sge = dev->attrs.max_recv_sge; attr->max_sge_rd = dev->attrs.max_sge_rd; - attr->max_cq = dev->attrs.max_cq; + attr->max_cq = dev->attrs.max_cq - 1; attr->max_cqe = dev->attrs.max_cqe; attr->max_mr = dev->attrs.max_mr; attr->max_pd = dev->attrs.max_pd; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index a174a0eee8dc..fc94a1b25485 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2738,26 +2738,24 @@ static int set_has_smi_cap(struct mlx5_ib_dev *dev) int err; int port; - for (port = 1; port <= ARRAY_SIZE(dev->port_caps); port++) { - dev->port_caps[port - 1].has_smi = false; - if (MLX5_CAP_GEN(dev->mdev, port_type) == - MLX5_CAP_PORT_TYPE_IB) { - if (MLX5_CAP_GEN(dev->mdev, ib_virt)) { - err = mlx5_query_hca_vport_context(dev->mdev, 0, - port, 0, - &vport_ctx); - if (err) { - mlx5_ib_err(dev, "query_hca_vport_context for port=%d failed %d\n", - port, err); - return err; - } - dev->port_caps[port - 1].has_smi = - vport_ctx.has_smi; - } else { - dev->port_caps[port - 1].has_smi = true; - } + if (MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_IB) + return 0; + + for (port = 1; port <= dev->num_ports; port++) { + if (!MLX5_CAP_GEN(dev->mdev, ib_virt)) { + dev->port_caps[port - 1].has_smi = true; + continue; } + err = mlx5_query_hca_vport_context(dev->mdev, 0, port, 0, + &vport_ctx); + if (err) { + mlx5_ib_err(dev, "query_hca_vport_context for port=%d failed %d\n", + port, err); + return err; + } + dev->port_caps[port - 1].has_smi = vport_ctx.has_smi; } + return 0; } diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index bd5f3b5e1727..7b83f48f60c5 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -537,6 +537,7 @@ void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc) struct iscsi_hdr *hdr; char *data; int length; + bool full_feature_phase; if (unlikely(wc->status != IB_WC_SUCCESS)) { iser_err_comp(wc, "login_rsp"); @@ -550,6 +551,9 @@ void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc) hdr = desc->rsp + sizeof(struct iser_ctrl); data = desc->rsp + ISER_HEADERS_LEN; length = wc->byte_len - ISER_HEADERS_LEN; + full_feature_phase = ((hdr->flags & ISCSI_FULL_FEATURE_PHASE) == + ISCSI_FULL_FEATURE_PHASE) && + (hdr->flags & ISCSI_FLAG_CMD_FINAL); iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode, hdr->itt, length); @@ -560,7 +564,8 @@ void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc) desc->rsp_dma, ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); - if (iser_conn->iscsi_conn->session->discovery_sess) + if (!full_feature_phase || + iser_conn->iscsi_conn->session->discovery_sess) return; /* Post the first RX buffer that is skipped in iser_post_rx_bufs() */ diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c index 51bd66a45a11..e190bb8c225c 100644 --- a/drivers/iommu/hyperv-iommu.c +++ b/drivers/iommu/hyperv-iommu.c @@ -68,7 +68,6 @@ static int hyperv_irq_remapping_alloc(struct irq_domain *domain, { struct irq_alloc_info *info = arg; struct irq_data *irq_data; - struct irq_desc *desc; int ret = 0; if (!info || info->type != X86_IRQ_ALLOC_TYPE_IOAPIC || nr_irqs > 1) @@ -90,8 +89,7 @@ static int hyperv_irq_remapping_alloc(struct irq_domain *domain, * Hypver-V IO APIC irq affinity should be in the scope of * ioapic_max_cpumask because no irq remapping support. */ - desc = irq_data_to_desc(irq_data); - cpumask_copy(desc->irq_common_data.affinity, &ioapic_max_cpumask); + irq_data_update_affinity(irq_data, &ioapic_max_cpumask); return 0; } diff --git a/drivers/irqchip/irq-loongarch-cpu.c b/drivers/irqchip/irq-loongarch-cpu.c index 327f3ab62c03..741612ba6a52 100644 --- a/drivers/irqchip/irq-loongarch-cpu.c +++ b/drivers/irqchip/irq-loongarch-cpu.c @@ -129,7 +129,7 @@ static int __init cpuintc_acpi_init(union acpi_subtable_headers *header, clear_csr_ecfg(ECFG0_IM); clear_csr_estat(ESTATF_IP); - cpuintc_handle = irq_domain_alloc_fwnode(NULL); + cpuintc_handle = irq_domain_alloc_named_fwnode("CPUINTC"); irq_domain = irq_domain_create_linear(cpuintc_handle, EXCCODE_INT_NUM, &loongarch_cpu_intc_irq_domain_ops, NULL); diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c index 80d8ca6f2d46..16e9af8d8b1e 100644 --- a/drivers/irqchip/irq-loongson-eiointc.c +++ b/drivers/irqchip/irq-loongson-eiointc.c @@ -111,11 +111,15 @@ static int eiointc_set_irq_affinity(struct irq_data *d, const struct cpumask *af regaddr = EIOINTC_REG_ENABLE + ((vector >> 5) << 2); /* Mask target vector */ - csr_any_send(regaddr, EIOINTC_ALL_ENABLE & (~BIT(vector & 0x1F)), 0x0, 0); + csr_any_send(regaddr, EIOINTC_ALL_ENABLE & (~BIT(vector & 0x1F)), + 0x0, priv->node * CORES_PER_EIO_NODE); + /* Set route for target vector */ eiointc_set_irq_route(vector, cpu, priv->node, &priv->node_map); + /* Unmask target vector */ - csr_any_send(regaddr, EIOINTC_ALL_ENABLE, 0x0, 0); + csr_any_send(regaddr, EIOINTC_ALL_ENABLE, + 0x0, priv->node * CORES_PER_EIO_NODE); irq_data_update_effective_affinity(d, cpumask_of(cpu)); @@ -286,7 +290,7 @@ static void acpi_set_vec_parent(int node, struct irq_domain *parent, struct acpi } } -struct irq_domain *acpi_get_vec_parent(int node, struct acpi_vector_group *vec_group) +static struct irq_domain *acpi_get_vec_parent(int node, struct acpi_vector_group *vec_group) { int i; @@ -344,7 +348,8 @@ int __init eiointc_acpi_init(struct irq_domain *parent, if (!priv) return -ENOMEM; - priv->domain_handle = irq_domain_alloc_fwnode((phys_addr_t *)acpi_eiointc); + priv->domain_handle = irq_domain_alloc_named_id_fwnode("EIOPIC", + acpi_eiointc->node); if (!priv->domain_handle) { pr_err("Unable to allocate domain handle\n"); goto out_free_priv; diff --git a/drivers/irqchip/irq-loongson-liointc.c b/drivers/irqchip/irq-loongson-liointc.c index c4f3c886ad61..0da8716f8f24 100644 --- a/drivers/irqchip/irq-loongson-liointc.c +++ b/drivers/irqchip/irq-loongson-liointc.c @@ -207,7 +207,7 @@ static int liointc_init(phys_addr_t addr, unsigned long size, int revision, "reg-names", core_reg_names[i]); if (index < 0) - return -EINVAL; + goto out_iounmap; priv->core_isr[i] = of_iomap(node, index); } @@ -360,7 +360,7 @@ int __init liointc_acpi_init(struct irq_domain *parent, struct acpi_madt_lio_pic parent_irq[0] = irq_create_mapping(parent, acpi_liointc->cascade[0]); parent_irq[1] = irq_create_mapping(parent, acpi_liointc->cascade[1]); - domain_handle = irq_domain_alloc_fwnode((phys_addr_t *)acpi_liointc); + domain_handle = irq_domain_alloc_fwnode(&acpi_liointc->address); if (!domain_handle) { pr_err("Unable to allocate domain handle\n"); return -ENOMEM; diff --git a/drivers/irqchip/irq-loongson-pch-msi.c b/drivers/irqchip/irq-loongson-pch-msi.c index d0e8551bebfa..a72ede90ffc6 100644 --- a/drivers/irqchip/irq-loongson-pch-msi.c +++ b/drivers/irqchip/irq-loongson-pch-msi.c @@ -282,7 +282,7 @@ int __init pch_msi_acpi_init(struct irq_domain *parent, int ret; struct fwnode_handle *domain_handle; - domain_handle = irq_domain_alloc_fwnode((phys_addr_t *)acpi_pchmsi); + domain_handle = irq_domain_alloc_fwnode(&acpi_pchmsi->msg_address); ret = pch_msi_init(acpi_pchmsi->msg_address, acpi_pchmsi->start, acpi_pchmsi->count, parent, domain_handle); if (ret < 0) diff --git a/drivers/irqchip/irq-loongson-pch-pic.c b/drivers/irqchip/irq-loongson-pch-pic.c index b6f1392964b1..c01b9c257005 100644 --- a/drivers/irqchip/irq-loongson-pch-pic.c +++ b/drivers/irqchip/irq-loongson-pch-pic.c @@ -48,25 +48,6 @@ static struct pch_pic *pch_pic_priv[MAX_IO_PICS]; struct fwnode_handle *pch_pic_handle[MAX_IO_PICS]; -int find_pch_pic(u32 gsi) -{ - int i; - - /* Find the PCH_PIC that manages this GSI. */ - for (i = 0; i < MAX_IO_PICS; i++) { - struct pch_pic *priv = pch_pic_priv[i]; - - if (!priv) - return -1; - - if (gsi >= priv->gsi_base && gsi < (priv->gsi_base + priv->vec_count)) - return i; - } - - pr_err("ERROR: Unable to locate PCH_PIC for GSI %d\n", gsi); - return -1; -} - static void pch_pic_bitset(struct pch_pic *priv, int offset, int bit) { u32 reg; @@ -325,6 +306,25 @@ IRQCHIP_DECLARE(pch_pic, "loongson,pch-pic-1.0", pch_pic_of_init); #endif #ifdef CONFIG_ACPI +int find_pch_pic(u32 gsi) +{ + int i; + + /* Find the PCH_PIC that manages this GSI. */ + for (i = 0; i < MAX_IO_PICS; i++) { + struct pch_pic *priv = pch_pic_priv[i]; + + if (!priv) + return -1; + + if (gsi >= priv->gsi_base && gsi < (priv->gsi_base + priv->vec_count)) + return i; + } + + pr_err("ERROR: Unable to locate PCH_PIC for GSI %d\n", gsi); + return -1; +} + static int __init pch_lpc_parse_madt(union acpi_subtable_headers *header, const unsigned long end) @@ -349,7 +349,7 @@ int __init pch_pic_acpi_init(struct irq_domain *parent, vec_base = acpi_pchpic->gsi_base - GSI_MIN_PCH_IRQ; - domain_handle = irq_domain_alloc_fwnode((phys_addr_t *)acpi_pchpic); + domain_handle = irq_domain_alloc_fwnode(&acpi_pchpic->address); if (!domain_handle) { pr_err("Unable to allocate domain handle\n"); return -ENOMEM; diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c index 2f08d442e557..fc462995cf94 100644 --- a/drivers/mmc/host/meson-gx-mmc.c +++ b/drivers/mmc/host/meson-gx-mmc.c @@ -1172,8 +1172,10 @@ static int meson_mmc_probe(struct platform_device *pdev) } ret = device_reset_optional(&pdev->dev); - if (ret) - return dev_err_probe(&pdev->dev, ret, "device reset failed\n"); + if (ret) { + dev_err_probe(&pdev->dev, ret, "device reset failed\n"); + goto free_host; + } res = platform_get_resource(pdev, IORESOURCE_MEM, 0); host->regs = devm_ioremap_resource(&pdev->dev, res); diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index 4ff73d1883de..69d78604d1fc 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -2446,6 +2446,9 @@ static void msdc_cqe_disable(struct mmc_host *mmc, bool recovery) /* disable busy check */ sdr_clr_bits(host->base + MSDC_PATCH_BIT1, MSDC_PB1_BUSY_CHECK_SEL); + val = readl(host->base + MSDC_INT); + writel(val, host->base + MSDC_INT); + if (recovery) { sdr_set_field(host->base + MSDC_DMA_CTRL, MSDC_DMA_CTRL_STOP, 1); @@ -2932,11 +2935,14 @@ static int __maybe_unused msdc_suspend(struct device *dev) struct mmc_host *mmc = dev_get_drvdata(dev); struct msdc_host *host = mmc_priv(mmc); int ret; + u32 val; if (mmc->caps2 & MMC_CAP2_CQE) { ret = cqhci_suspend(mmc); if (ret) return ret; + val = readl(host->base + MSDC_INT); + writel(val, host->base + MSDC_INT); } /* diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c index 0db9490dc659..e4003f6058eb 100644 --- a/drivers/mmc/host/pxamci.c +++ b/drivers/mmc/host/pxamci.c @@ -648,7 +648,7 @@ static int pxamci_probe(struct platform_device *pdev) ret = pxamci_of_init(pdev, mmc); if (ret) - return ret; + goto out; host = mmc_priv(mmc); host->mmc = mmc; @@ -672,7 +672,7 @@ static int pxamci_probe(struct platform_device *pdev) ret = pxamci_init_ocr(host); if (ret < 0) - return ret; + goto out; mmc->caps = 0; host->cmdat = 0; diff --git a/drivers/mmc/host/sdhci-of-dwcmshc.c b/drivers/mmc/host/sdhci-of-dwcmshc.c index 4e904850973c..a7343d4bc50e 100644 --- a/drivers/mmc/host/sdhci-of-dwcmshc.c +++ b/drivers/mmc/host/sdhci-of-dwcmshc.c @@ -349,6 +349,15 @@ static const struct sdhci_pltfm_data sdhci_dwcmshc_pdata = { .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN, }; +#ifdef CONFIG_ACPI +static const struct sdhci_pltfm_data sdhci_dwcmshc_bf3_pdata = { + .ops = &sdhci_dwcmshc_ops, + .quirks = SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN, + .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN | + SDHCI_QUIRK2_ACMD23_BROKEN, +}; +#endif + static const struct sdhci_pltfm_data sdhci_dwcmshc_rk35xx_pdata = { .ops = &sdhci_dwcmshc_rk35xx_ops, .quirks = SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN | @@ -431,7 +440,10 @@ MODULE_DEVICE_TABLE(of, sdhci_dwcmshc_dt_ids); #ifdef CONFIG_ACPI static const struct acpi_device_id sdhci_dwcmshc_acpi_ids[] = { - { .id = "MLNXBF30" }, + { + .id = "MLNXBF30", + .driver_data = (kernel_ulong_t)&sdhci_dwcmshc_bf3_pdata, + }, {} }; #endif @@ -447,7 +459,7 @@ static int dwcmshc_probe(struct platform_device *pdev) int err; u32 extra; - pltfm_data = of_device_get_match_data(&pdev->dev); + pltfm_data = device_get_match_data(&pdev->dev); if (!pltfm_data) { dev_err(&pdev->dev, "Error: No device match data found\n"); return -ENODEV; diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index 4b14d80d27ed..e4f446db0ca1 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -613,6 +613,9 @@ int ksz9477_fdb_dump(struct ksz_device *dev, int port, goto exit; } + if (!(ksz_data & ALU_VALID)) + continue; + /* read ALU table */ ksz9477_read_table(dev, alu_table); diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c index a4c6eb9a52d0..83dca9179aa0 100644 --- a/drivers/net/dsa/mv88e6060.c +++ b/drivers/net/dsa/mv88e6060.c @@ -118,6 +118,9 @@ static int mv88e6060_setup_port(struct mv88e6060_priv *priv, int p) int addr = REG_PORT(p); int ret; + if (dsa_is_unused_port(priv->ds, p)) + return 0; + /* Do not force flow control, disable Ingress and Egress * Header tagging, disable VLAN tunneling, and set the port * state to Forwarding. Additionally, if this is the CPU diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index b4034b78c0ca..1cdce8a98d1d 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -274,27 +274,98 @@ static const u32 vsc9959_rew_regmap[] = { static const u32 vsc9959_sys_regmap[] = { REG(SYS_COUNT_RX_OCTETS, 0x000000), + REG(SYS_COUNT_RX_UNICAST, 0x000004), REG(SYS_COUNT_RX_MULTICAST, 0x000008), + REG(SYS_COUNT_RX_BROADCAST, 0x00000c), REG(SYS_COUNT_RX_SHORTS, 0x000010), REG(SYS_COUNT_RX_FRAGMENTS, 0x000014), REG(SYS_COUNT_RX_JABBERS, 0x000018), + REG(SYS_COUNT_RX_CRC_ALIGN_ERRS, 0x00001c), + REG(SYS_COUNT_RX_SYM_ERRS, 0x000020), REG(SYS_COUNT_RX_64, 0x000024), REG(SYS_COUNT_RX_65_127, 0x000028), REG(SYS_COUNT_RX_128_255, 0x00002c), - REG(SYS_COUNT_RX_256_1023, 0x000030), - REG(SYS_COUNT_RX_1024_1526, 0x000034), - REG(SYS_COUNT_RX_1527_MAX, 0x000038), - REG(SYS_COUNT_RX_LONGS, 0x000044), + REG(SYS_COUNT_RX_256_511, 0x000030), + REG(SYS_COUNT_RX_512_1023, 0x000034), + REG(SYS_COUNT_RX_1024_1526, 0x000038), + REG(SYS_COUNT_RX_1527_MAX, 0x00003c), + REG(SYS_COUNT_RX_PAUSE, 0x000040), + REG(SYS_COUNT_RX_CONTROL, 0x000044), + REG(SYS_COUNT_RX_LONGS, 0x000048), + REG(SYS_COUNT_RX_CLASSIFIED_DROPS, 0x00004c), + REG(SYS_COUNT_RX_RED_PRIO_0, 0x000050), + REG(SYS_COUNT_RX_RED_PRIO_1, 0x000054), + REG(SYS_COUNT_RX_RED_PRIO_2, 0x000058), + REG(SYS_COUNT_RX_RED_PRIO_3, 0x00005c), + REG(SYS_COUNT_RX_RED_PRIO_4, 0x000060), + REG(SYS_COUNT_RX_RED_PRIO_5, 0x000064), + REG(SYS_COUNT_RX_RED_PRIO_6, 0x000068), + REG(SYS_COUNT_RX_RED_PRIO_7, 0x00006c), + REG(SYS_COUNT_RX_YELLOW_PRIO_0, 0x000070), + REG(SYS_COUNT_RX_YELLOW_PRIO_1, 0x000074), + REG(SYS_COUNT_RX_YELLOW_PRIO_2, 0x000078), + REG(SYS_COUNT_RX_YELLOW_PRIO_3, 0x00007c), + REG(SYS_COUNT_RX_YELLOW_PRIO_4, 0x000080), + REG(SYS_COUNT_RX_YELLOW_PRIO_5, 0x000084), + REG(SYS_COUNT_RX_YELLOW_PRIO_6, 0x000088), + REG(SYS_COUNT_RX_YELLOW_PRIO_7, 0x00008c), + REG(SYS_COUNT_RX_GREEN_PRIO_0, 0x000090), + REG(SYS_COUNT_RX_GREEN_PRIO_1, 0x000094), + REG(SYS_COUNT_RX_GREEN_PRIO_2, 0x000098), + REG(SYS_COUNT_RX_GREEN_PRIO_3, 0x00009c), + REG(SYS_COUNT_RX_GREEN_PRIO_4, 0x0000a0), + REG(SYS_COUNT_RX_GREEN_PRIO_5, 0x0000a4), + REG(SYS_COUNT_RX_GREEN_PRIO_6, 0x0000a8), + REG(SYS_COUNT_RX_GREEN_PRIO_7, 0x0000ac), REG(SYS_COUNT_TX_OCTETS, 0x000200), + REG(SYS_COUNT_TX_UNICAST, 0x000204), + REG(SYS_COUNT_TX_MULTICAST, 0x000208), + REG(SYS_COUNT_TX_BROADCAST, 0x00020c), REG(SYS_COUNT_TX_COLLISION, 0x000210), REG(SYS_COUNT_TX_DROPS, 0x000214), + REG(SYS_COUNT_TX_PAUSE, 0x000218), REG(SYS_COUNT_TX_64, 0x00021c), REG(SYS_COUNT_TX_65_127, 0x000220), - REG(SYS_COUNT_TX_128_511, 0x000224), - REG(SYS_COUNT_TX_512_1023, 0x000228), - REG(SYS_COUNT_TX_1024_1526, 0x00022c), - REG(SYS_COUNT_TX_1527_MAX, 0x000230), + REG(SYS_COUNT_TX_128_255, 0x000224), + REG(SYS_COUNT_TX_256_511, 0x000228), + REG(SYS_COUNT_TX_512_1023, 0x00022c), + REG(SYS_COUNT_TX_1024_1526, 0x000230), + REG(SYS_COUNT_TX_1527_MAX, 0x000234), + REG(SYS_COUNT_TX_YELLOW_PRIO_0, 0x000238), + REG(SYS_COUNT_TX_YELLOW_PRIO_1, 0x00023c), + REG(SYS_COUNT_TX_YELLOW_PRIO_2, 0x000240), + REG(SYS_COUNT_TX_YELLOW_PRIO_3, 0x000244), + REG(SYS_COUNT_TX_YELLOW_PRIO_4, 0x000248), + REG(SYS_COUNT_TX_YELLOW_PRIO_5, 0x00024c), + REG(SYS_COUNT_TX_YELLOW_PRIO_6, 0x000250), + REG(SYS_COUNT_TX_YELLOW_PRIO_7, 0x000254), + REG(SYS_COUNT_TX_GREEN_PRIO_0, 0x000258), + REG(SYS_COUNT_TX_GREEN_PRIO_1, 0x00025c), + REG(SYS_COUNT_TX_GREEN_PRIO_2, 0x000260), + REG(SYS_COUNT_TX_GREEN_PRIO_3, 0x000264), + REG(SYS_COUNT_TX_GREEN_PRIO_4, 0x000268), + REG(SYS_COUNT_TX_GREEN_PRIO_5, 0x00026c), + REG(SYS_COUNT_TX_GREEN_PRIO_6, 0x000270), + REG(SYS_COUNT_TX_GREEN_PRIO_7, 0x000274), REG(SYS_COUNT_TX_AGING, 0x000278), + REG(SYS_COUNT_DROP_LOCAL, 0x000400), + REG(SYS_COUNT_DROP_TAIL, 0x000404), + REG(SYS_COUNT_DROP_YELLOW_PRIO_0, 0x000408), + REG(SYS_COUNT_DROP_YELLOW_PRIO_1, 0x00040c), + REG(SYS_COUNT_DROP_YELLOW_PRIO_2, 0x000410), + REG(SYS_COUNT_DROP_YELLOW_PRIO_3, 0x000414), + REG(SYS_COUNT_DROP_YELLOW_PRIO_4, 0x000418), + REG(SYS_COUNT_DROP_YELLOW_PRIO_5, 0x00041c), + REG(SYS_COUNT_DROP_YELLOW_PRIO_6, 0x000420), + REG(SYS_COUNT_DROP_YELLOW_PRIO_7, 0x000424), + REG(SYS_COUNT_DROP_GREEN_PRIO_0, 0x000428), + REG(SYS_COUNT_DROP_GREEN_PRIO_1, 0x00042c), + REG(SYS_COUNT_DROP_GREEN_PRIO_2, 0x000430), + REG(SYS_COUNT_DROP_GREEN_PRIO_3, 0x000434), + REG(SYS_COUNT_DROP_GREEN_PRIO_4, 0x000438), + REG(SYS_COUNT_DROP_GREEN_PRIO_5, 0x00043c), + REG(SYS_COUNT_DROP_GREEN_PRIO_6, 0x000440), + REG(SYS_COUNT_DROP_GREEN_PRIO_7, 0x000444), REG(SYS_RESET_CFG, 0x000e00), REG(SYS_SR_ETYPE_CFG, 0x000e04), REG(SYS_VLAN_ETYPE_CFG, 0x000e08), @@ -547,100 +618,379 @@ static const struct reg_field vsc9959_regfields[REGFIELD_MAX] = { [SYS_PAUSE_CFG_PAUSE_ENA] = REG_FIELD_ID(SYS_PAUSE_CFG, 0, 1, 7, 4), }; -static const struct ocelot_stat_layout vsc9959_stats_layout[] = { - { .offset = 0x00, .name = "rx_octets", }, - { .offset = 0x01, .name = "rx_unicast", }, - { .offset = 0x02, .name = "rx_multicast", }, - { .offset = 0x03, .name = "rx_broadcast", }, - { .offset = 0x04, .name = "rx_shorts", }, - { .offset = 0x05, .name = "rx_fragments", }, - { .offset = 0x06, .name = "rx_jabbers", }, - { .offset = 0x07, .name = "rx_crc_align_errs", }, - { .offset = 0x08, .name = "rx_sym_errs", }, - { .offset = 0x09, .name = "rx_frames_below_65_octets", }, - { .offset = 0x0A, .name = "rx_frames_65_to_127_octets", }, - { .offset = 0x0B, .name = "rx_frames_128_to_255_octets", }, - { .offset = 0x0C, .name = "rx_frames_256_to_511_octets", }, - { .offset = 0x0D, .name = "rx_frames_512_to_1023_octets", }, - { .offset = 0x0E, .name = "rx_frames_1024_to_1526_octets", }, - { .offset = 0x0F, .name = "rx_frames_over_1526_octets", }, - { .offset = 0x10, .name = "rx_pause", }, - { .offset = 0x11, .name = "rx_control", }, - { .offset = 0x12, .name = "rx_longs", }, - { .offset = 0x13, .name = "rx_classified_drops", }, - { .offset = 0x14, .name = "rx_red_prio_0", }, - { .offset = 0x15, .name = "rx_red_prio_1", }, - { .offset = 0x16, .name = "rx_red_prio_2", }, - { .offset = 0x17, .name = "rx_red_prio_3", }, - { .offset = 0x18, .name = "rx_red_prio_4", }, - { .offset = 0x19, .name = "rx_red_prio_5", }, - { .offset = 0x1A, .name = "rx_red_prio_6", }, - { .offset = 0x1B, .name = "rx_red_prio_7", }, - { .offset = 0x1C, .name = "rx_yellow_prio_0", }, - { .offset = 0x1D, .name = "rx_yellow_prio_1", }, - { .offset = 0x1E, .name = "rx_yellow_prio_2", }, - { .offset = 0x1F, .name = "rx_yellow_prio_3", }, - { .offset = 0x20, .name = "rx_yellow_prio_4", }, - { .offset = 0x21, .name = "rx_yellow_prio_5", }, - { .offset = 0x22, .name = "rx_yellow_prio_6", }, - { .offset = 0x23, .name = "rx_yellow_prio_7", }, - { .offset = 0x24, .name = "rx_green_prio_0", }, - { .offset = 0x25, .name = "rx_green_prio_1", }, - { .offset = 0x26, .name = "rx_green_prio_2", }, - { .offset = 0x27, .name = "rx_green_prio_3", }, - { .offset = 0x28, .name = "rx_green_prio_4", }, - { .offset = 0x29, .name = "rx_green_prio_5", }, - { .offset = 0x2A, .name = "rx_green_prio_6", }, - { .offset = 0x2B, .name = "rx_green_prio_7", }, - { .offset = 0x80, .name = "tx_octets", }, - { .offset = 0x81, .name = "tx_unicast", }, - { .offset = 0x82, .name = "tx_multicast", }, - { .offset = 0x83, .name = "tx_broadcast", }, - { .offset = 0x84, .name = "tx_collision", }, - { .offset = 0x85, .name = "tx_drops", }, - { .offset = 0x86, .name = "tx_pause", }, - { .offset = 0x87, .name = "tx_frames_below_65_octets", }, - { .offset = 0x88, .name = "tx_frames_65_to_127_octets", }, - { .offset = 0x89, .name = "tx_frames_128_255_octets", }, - { .offset = 0x8B, .name = "tx_frames_256_511_octets", }, - { .offset = 0x8C, .name = "tx_frames_1024_1526_octets", }, - { .offset = 0x8D, .name = "tx_frames_over_1526_octets", }, - { .offset = 0x8E, .name = "tx_yellow_prio_0", }, - { .offset = 0x8F, .name = "tx_yellow_prio_1", }, - { .offset = 0x90, .name = "tx_yellow_prio_2", }, - { .offset = 0x91, .name = "tx_yellow_prio_3", }, - { .offset = 0x92, .name = "tx_yellow_prio_4", }, - { .offset = 0x93, .name = "tx_yellow_prio_5", }, - { .offset = 0x94, .name = "tx_yellow_prio_6", }, - { .offset = 0x95, .name = "tx_yellow_prio_7", }, - { .offset = 0x96, .name = "tx_green_prio_0", }, - { .offset = 0x97, .name = "tx_green_prio_1", }, - { .offset = 0x98, .name = "tx_green_prio_2", }, - { .offset = 0x99, .name = "tx_green_prio_3", }, - { .offset = 0x9A, .name = "tx_green_prio_4", }, - { .offset = 0x9B, .name = "tx_green_prio_5", }, - { .offset = 0x9C, .name = "tx_green_prio_6", }, - { .offset = 0x9D, .name = "tx_green_prio_7", }, - { .offset = 0x9E, .name = "tx_aged", }, - { .offset = 0x100, .name = "drop_local", }, - { .offset = 0x101, .name = "drop_tail", }, - { .offset = 0x102, .name = "drop_yellow_prio_0", }, - { .offset = 0x103, .name = "drop_yellow_prio_1", }, - { .offset = 0x104, .name = "drop_yellow_prio_2", }, - { .offset = 0x105, .name = "drop_yellow_prio_3", }, - { .offset = 0x106, .name = "drop_yellow_prio_4", }, - { .offset = 0x107, .name = "drop_yellow_prio_5", }, - { .offset = 0x108, .name = "drop_yellow_prio_6", }, - { .offset = 0x109, .name = "drop_yellow_prio_7", }, - { .offset = 0x10A, .name = "drop_green_prio_0", }, - { .offset = 0x10B, .name = "drop_green_prio_1", }, - { .offset = 0x10C, .name = "drop_green_prio_2", }, - { .offset = 0x10D, .name = "drop_green_prio_3", }, - { .offset = 0x10E, .name = "drop_green_prio_4", }, - { .offset = 0x10F, .name = "drop_green_prio_5", }, - { .offset = 0x110, .name = "drop_green_prio_6", }, - { .offset = 0x111, .name = "drop_green_prio_7", }, - OCELOT_STAT_END +static const struct ocelot_stat_layout vsc9959_stats_layout[OCELOT_NUM_STATS] = { + [OCELOT_STAT_RX_OCTETS] = { + .name = "rx_octets", + .reg = SYS_COUNT_RX_OCTETS, + }, + [OCELOT_STAT_RX_UNICAST] = { + .name = "rx_unicast", + .reg = SYS_COUNT_RX_UNICAST, + }, + [OCELOT_STAT_RX_MULTICAST] = { + .name = "rx_multicast", + .reg = SYS_COUNT_RX_MULTICAST, + }, + [OCELOT_STAT_RX_BROADCAST] = { + .name = "rx_broadcast", + .reg = SYS_COUNT_RX_BROADCAST, + }, + [OCELOT_STAT_RX_SHORTS] = { + .name = "rx_shorts", + .reg = SYS_COUNT_RX_SHORTS, + }, + [OCELOT_STAT_RX_FRAGMENTS] = { + .name = "rx_fragments", + .reg = SYS_COUNT_RX_FRAGMENTS, + }, + [OCELOT_STAT_RX_JABBERS] = { + .name = "rx_jabbers", + .reg = SYS_COUNT_RX_JABBERS, + }, + [OCELOT_STAT_RX_CRC_ALIGN_ERRS] = { + .name = "rx_crc_align_errs", + .reg = SYS_COUNT_RX_CRC_ALIGN_ERRS, + }, + [OCELOT_STAT_RX_SYM_ERRS] = { + .name = "rx_sym_errs", + .reg = SYS_COUNT_RX_SYM_ERRS, + }, + [OCELOT_STAT_RX_64] = { + .name = "rx_frames_below_65_octets", + .reg = SYS_COUNT_RX_64, + }, + [OCELOT_STAT_RX_65_127] = { + .name = "rx_frames_65_to_127_octets", + .reg = SYS_COUNT_RX_65_127, + }, + [OCELOT_STAT_RX_128_255] = { + .name = "rx_frames_128_to_255_octets", + .reg = SYS_COUNT_RX_128_255, + }, + [OCELOT_STAT_RX_256_511] = { + .name = "rx_frames_256_to_511_octets", + .reg = SYS_COUNT_RX_256_511, + }, + [OCELOT_STAT_RX_512_1023] = { + .name = "rx_frames_512_to_1023_octets", + .reg = SYS_COUNT_RX_512_1023, + }, + [OCELOT_STAT_RX_1024_1526] = { + .name = "rx_frames_1024_to_1526_octets", + .reg = SYS_COUNT_RX_1024_1526, + }, + [OCELOT_STAT_RX_1527_MAX] = { + .name = "rx_frames_over_1526_octets", + .reg = SYS_COUNT_RX_1527_MAX, + }, + [OCELOT_STAT_RX_PAUSE] = { + .name = "rx_pause", + .reg = SYS_COUNT_RX_PAUSE, + }, + [OCELOT_STAT_RX_CONTROL] = { + .name = "rx_control", + .reg = SYS_COUNT_RX_CONTROL, + }, + [OCELOT_STAT_RX_LONGS] = { + .name = "rx_longs", + .reg = SYS_COUNT_RX_LONGS, + }, + [OCELOT_STAT_RX_CLASSIFIED_DROPS] = { + .name = "rx_classified_drops", + .reg = SYS_COUNT_RX_CLASSIFIED_DROPS, + }, + [OCELOT_STAT_RX_RED_PRIO_0] = { + .name = "rx_red_prio_0", + .reg = SYS_COUNT_RX_RED_PRIO_0, + }, + [OCELOT_STAT_RX_RED_PRIO_1] = { + .name = "rx_red_prio_1", + .reg = SYS_COUNT_RX_RED_PRIO_1, + }, + [OCELOT_STAT_RX_RED_PRIO_2] = { + .name = "rx_red_prio_2", + .reg = SYS_COUNT_RX_RED_PRIO_2, + }, + [OCELOT_STAT_RX_RED_PRIO_3] = { + .name = "rx_red_prio_3", + .reg = SYS_COUNT_RX_RED_PRIO_3, + }, + [OCELOT_STAT_RX_RED_PRIO_4] = { + .name = "rx_red_prio_4", + .reg = SYS_COUNT_RX_RED_PRIO_4, + }, + [OCELOT_STAT_RX_RED_PRIO_5] = { + .name = "rx_red_prio_5", + .reg = SYS_COUNT_RX_RED_PRIO_5, + }, + [OCELOT_STAT_RX_RED_PRIO_6] = { + .name = "rx_red_prio_6", + .reg = SYS_COUNT_RX_RED_PRIO_6, + }, + [OCELOT_STAT_RX_RED_PRIO_7] = { + .name = "rx_red_prio_7", + .reg = SYS_COUNT_RX_RED_PRIO_7, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_0] = { + .name = "rx_yellow_prio_0", + .reg = SYS_COUNT_RX_YELLOW_PRIO_0, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_1] = { + .name = "rx_yellow_prio_1", + .reg = SYS_COUNT_RX_YELLOW_PRIO_1, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_2] = { + .name = "rx_yellow_prio_2", + .reg = SYS_COUNT_RX_YELLOW_PRIO_2, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_3] = { + .name = "rx_yellow_prio_3", + .reg = SYS_COUNT_RX_YELLOW_PRIO_3, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_4] = { + .name = "rx_yellow_prio_4", + .reg = SYS_COUNT_RX_YELLOW_PRIO_4, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_5] = { + .name = "rx_yellow_prio_5", + .reg = SYS_COUNT_RX_YELLOW_PRIO_5, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_6] = { + .name = "rx_yellow_prio_6", + .reg = SYS_COUNT_RX_YELLOW_PRIO_6, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_7] = { + .name = "rx_yellow_prio_7", + .reg = SYS_COUNT_RX_YELLOW_PRIO_7, + }, + [OCELOT_STAT_RX_GREEN_PRIO_0] = { + .name = "rx_green_prio_0", + .reg = SYS_COUNT_RX_GREEN_PRIO_0, + }, + [OCELOT_STAT_RX_GREEN_PRIO_1] = { + .name = "rx_green_prio_1", + .reg = SYS_COUNT_RX_GREEN_PRIO_1, + }, + [OCELOT_STAT_RX_GREEN_PRIO_2] = { + .name = "rx_green_prio_2", + .reg = SYS_COUNT_RX_GREEN_PRIO_2, + }, + [OCELOT_STAT_RX_GREEN_PRIO_3] = { + .name = "rx_green_prio_3", + .reg = SYS_COUNT_RX_GREEN_PRIO_3, + }, + [OCELOT_STAT_RX_GREEN_PRIO_4] = { + .name = "rx_green_prio_4", + .reg = SYS_COUNT_RX_GREEN_PRIO_4, + }, + [OCELOT_STAT_RX_GREEN_PRIO_5] = { + .name = "rx_green_prio_5", + .reg = SYS_COUNT_RX_GREEN_PRIO_5, + }, + [OCELOT_STAT_RX_GREEN_PRIO_6] = { + .name = "rx_green_prio_6", + .reg = SYS_COUNT_RX_GREEN_PRIO_6, + }, + [OCELOT_STAT_RX_GREEN_PRIO_7] = { + .name = "rx_green_prio_7", + .reg = SYS_COUNT_RX_GREEN_PRIO_7, + }, + [OCELOT_STAT_TX_OCTETS] = { + .name = "tx_octets", + .reg = SYS_COUNT_TX_OCTETS, + }, + [OCELOT_STAT_TX_UNICAST] = { + .name = "tx_unicast", + .reg = SYS_COUNT_TX_UNICAST, + }, + [OCELOT_STAT_TX_MULTICAST] = { + .name = "tx_multicast", + .reg = SYS_COUNT_TX_MULTICAST, + }, + [OCELOT_STAT_TX_BROADCAST] = { + .name = "tx_broadcast", + .reg = SYS_COUNT_TX_BROADCAST, + }, + [OCELOT_STAT_TX_COLLISION] = { + .name = "tx_collision", + .reg = SYS_COUNT_TX_COLLISION, + }, + [OCELOT_STAT_TX_DROPS] = { + .name = "tx_drops", + .reg = SYS_COUNT_TX_DROPS, + }, + [OCELOT_STAT_TX_PAUSE] = { + .name = "tx_pause", + .reg = SYS_COUNT_TX_PAUSE, + }, + [OCELOT_STAT_TX_64] = { + .name = "tx_frames_below_65_octets", + .reg = SYS_COUNT_TX_64, + }, + [OCELOT_STAT_TX_65_127] = { + .name = "tx_frames_65_to_127_octets", + .reg = SYS_COUNT_TX_65_127, + }, + [OCELOT_STAT_TX_128_255] = { + .name = "tx_frames_128_255_octets", + .reg = SYS_COUNT_TX_128_255, + }, + [OCELOT_STAT_TX_256_511] = { + .name = "tx_frames_256_511_octets", + .reg = SYS_COUNT_TX_256_511, + }, + [OCELOT_STAT_TX_512_1023] = { + .name = "tx_frames_512_1023_octets", + .reg = SYS_COUNT_TX_512_1023, + }, + [OCELOT_STAT_TX_1024_1526] = { + .name = "tx_frames_1024_1526_octets", + .reg = SYS_COUNT_TX_1024_1526, + }, + [OCELOT_STAT_TX_1527_MAX] = { + .name = "tx_frames_over_1526_octets", + .reg = SYS_COUNT_TX_1527_MAX, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_0] = { + .name = "tx_yellow_prio_0", + .reg = SYS_COUNT_TX_YELLOW_PRIO_0, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_1] = { + .name = "tx_yellow_prio_1", + .reg = SYS_COUNT_TX_YELLOW_PRIO_1, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_2] = { + .name = "tx_yellow_prio_2", + .reg = SYS_COUNT_TX_YELLOW_PRIO_2, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_3] = { + .name = "tx_yellow_prio_3", + .reg = SYS_COUNT_TX_YELLOW_PRIO_3, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_4] = { + .name = "tx_yellow_prio_4", + .reg = SYS_COUNT_TX_YELLOW_PRIO_4, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_5] = { + .name = "tx_yellow_prio_5", + .reg = SYS_COUNT_TX_YELLOW_PRIO_5, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_6] = { + .name = "tx_yellow_prio_6", + .reg = SYS_COUNT_TX_YELLOW_PRIO_6, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_7] = { + .name = "tx_yellow_prio_7", + .reg = SYS_COUNT_TX_YELLOW_PRIO_7, + }, + [OCELOT_STAT_TX_GREEN_PRIO_0] = { + .name = "tx_green_prio_0", + .reg = SYS_COUNT_TX_GREEN_PRIO_0, + }, + [OCELOT_STAT_TX_GREEN_PRIO_1] = { + .name = "tx_green_prio_1", + .reg = SYS_COUNT_TX_GREEN_PRIO_1, + }, + [OCELOT_STAT_TX_GREEN_PRIO_2] = { + .name = "tx_green_prio_2", + .reg = SYS_COUNT_TX_GREEN_PRIO_2, + }, + [OCELOT_STAT_TX_GREEN_PRIO_3] = { + .name = "tx_green_prio_3", + .reg = SYS_COUNT_TX_GREEN_PRIO_3, + }, + [OCELOT_STAT_TX_GREEN_PRIO_4] = { + .name = "tx_green_prio_4", + .reg = SYS_COUNT_TX_GREEN_PRIO_4, + }, + [OCELOT_STAT_TX_GREEN_PRIO_5] = { + .name = "tx_green_prio_5", + .reg = SYS_COUNT_TX_GREEN_PRIO_5, + }, + [OCELOT_STAT_TX_GREEN_PRIO_6] = { + .name = "tx_green_prio_6", + .reg = SYS_COUNT_TX_GREEN_PRIO_6, + }, + [OCELOT_STAT_TX_GREEN_PRIO_7] = { + .name = "tx_green_prio_7", + .reg = SYS_COUNT_TX_GREEN_PRIO_7, + }, + [OCELOT_STAT_TX_AGED] = { + .name = "tx_aged", + .reg = SYS_COUNT_TX_AGING, + }, + [OCELOT_STAT_DROP_LOCAL] = { + .name = "drop_local", + .reg = SYS_COUNT_DROP_LOCAL, + }, + [OCELOT_STAT_DROP_TAIL] = { + .name = "drop_tail", + .reg = SYS_COUNT_DROP_TAIL, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_0] = { + .name = "drop_yellow_prio_0", + .reg = SYS_COUNT_DROP_YELLOW_PRIO_0, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_1] = { + .name = "drop_yellow_prio_1", + .reg = SYS_COUNT_DROP_YELLOW_PRIO_1, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_2] = { + .name = "drop_yellow_prio_2", + .reg = SYS_COUNT_DROP_YELLOW_PRIO_2, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_3] = { + .name = "drop_yellow_prio_3", + .reg = SYS_COUNT_DROP_YELLOW_PRIO_3, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_4] = { + .name = "drop_yellow_prio_4", + .reg = SYS_COUNT_DROP_YELLOW_PRIO_4, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_5] = { + .name = "drop_yellow_prio_5", + .reg = SYS_COUNT_DROP_YELLOW_PRIO_5, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_6] = { + .name = "drop_yellow_prio_6", + .reg = SYS_COUNT_DROP_YELLOW_PRIO_6, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_7] = { + .name = "drop_yellow_prio_7", + .reg = SYS_COUNT_DROP_YELLOW_PRIO_7, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_0] = { + .name = "drop_green_prio_0", + .reg = SYS_COUNT_DROP_GREEN_PRIO_0, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_1] = { + .name = "drop_green_prio_1", + .reg = SYS_COUNT_DROP_GREEN_PRIO_1, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_2] = { + .name = "drop_green_prio_2", + .reg = SYS_COUNT_DROP_GREEN_PRIO_2, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_3] = { + .name = "drop_green_prio_3", + .reg = SYS_COUNT_DROP_GREEN_PRIO_3, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_4] = { + .name = "drop_green_prio_4", + .reg = SYS_COUNT_DROP_GREEN_PRIO_4, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_5] = { + .name = "drop_green_prio_5", + .reg = SYS_COUNT_DROP_GREEN_PRIO_5, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_6] = { + .name = "drop_green_prio_6", + .reg = SYS_COUNT_DROP_GREEN_PRIO_6, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_7] = { + .name = "drop_green_prio_7", + .reg = SYS_COUNT_DROP_GREEN_PRIO_7, + }, }; static const struct vcap_field vsc9959_vcap_es0_keys[] = { @@ -2166,7 +2516,7 @@ static void vsc9959_psfp_sgi_table_del(struct ocelot *ocelot, static void vsc9959_psfp_counters_get(struct ocelot *ocelot, u32 index, struct felix_stream_filter_counters *counters) { - mutex_lock(&ocelot->stats_lock); + spin_lock(&ocelot->stats_lock); ocelot_rmw(ocelot, SYS_STAT_CFG_STAT_VIEW(index), SYS_STAT_CFG_STAT_VIEW_M, @@ -2183,7 +2533,7 @@ static void vsc9959_psfp_counters_get(struct ocelot *ocelot, u32 index, SYS_STAT_CFG_STAT_CLEAR_SHOT(0x10), SYS_STAT_CFG); - mutex_unlock(&ocelot->stats_lock); + spin_unlock(&ocelot->stats_lock); } static int vsc9959_psfp_filter_add(struct ocelot *ocelot, int port, diff --git a/drivers/net/dsa/ocelot/seville_vsc9953.c b/drivers/net/dsa/ocelot/seville_vsc9953.c index ea0649211356..b34f4cdfe814 100644 --- a/drivers/net/dsa/ocelot/seville_vsc9953.c +++ b/drivers/net/dsa/ocelot/seville_vsc9953.c @@ -270,27 +270,98 @@ static const u32 vsc9953_rew_regmap[] = { static const u32 vsc9953_sys_regmap[] = { REG(SYS_COUNT_RX_OCTETS, 0x000000), + REG(SYS_COUNT_RX_UNICAST, 0x000004), REG(SYS_COUNT_RX_MULTICAST, 0x000008), + REG(SYS_COUNT_RX_BROADCAST, 0x00000c), REG(SYS_COUNT_RX_SHORTS, 0x000010), REG(SYS_COUNT_RX_FRAGMENTS, 0x000014), REG(SYS_COUNT_RX_JABBERS, 0x000018), + REG(SYS_COUNT_RX_CRC_ALIGN_ERRS, 0x00001c), + REG(SYS_COUNT_RX_SYM_ERRS, 0x000020), REG(SYS_COUNT_RX_64, 0x000024), REG(SYS_COUNT_RX_65_127, 0x000028), REG(SYS_COUNT_RX_128_255, 0x00002c), - REG(SYS_COUNT_RX_256_1023, 0x000030), - REG(SYS_COUNT_RX_1024_1526, 0x000034), - REG(SYS_COUNT_RX_1527_MAX, 0x000038), + REG(SYS_COUNT_RX_256_511, 0x000030), + REG(SYS_COUNT_RX_512_1023, 0x000034), + REG(SYS_COUNT_RX_1024_1526, 0x000038), + REG(SYS_COUNT_RX_1527_MAX, 0x00003c), + REG(SYS_COUNT_RX_PAUSE, 0x000040), + REG(SYS_COUNT_RX_CONTROL, 0x000044), REG(SYS_COUNT_RX_LONGS, 0x000048), + REG(SYS_COUNT_RX_CLASSIFIED_DROPS, 0x00004c), + REG(SYS_COUNT_RX_RED_PRIO_0, 0x000050), + REG(SYS_COUNT_RX_RED_PRIO_1, 0x000054), + REG(SYS_COUNT_RX_RED_PRIO_2, 0x000058), + REG(SYS_COUNT_RX_RED_PRIO_3, 0x00005c), + REG(SYS_COUNT_RX_RED_PRIO_4, 0x000060), + REG(SYS_COUNT_RX_RED_PRIO_5, 0x000064), + REG(SYS_COUNT_RX_RED_PRIO_6, 0x000068), + REG(SYS_COUNT_RX_RED_PRIO_7, 0x00006c), + REG(SYS_COUNT_RX_YELLOW_PRIO_0, 0x000070), + REG(SYS_COUNT_RX_YELLOW_PRIO_1, 0x000074), + REG(SYS_COUNT_RX_YELLOW_PRIO_2, 0x000078), + REG(SYS_COUNT_RX_YELLOW_PRIO_3, 0x00007c), + REG(SYS_COUNT_RX_YELLOW_PRIO_4, 0x000080), + REG(SYS_COUNT_RX_YELLOW_PRIO_5, 0x000084), + REG(SYS_COUNT_RX_YELLOW_PRIO_6, 0x000088), + REG(SYS_COUNT_RX_YELLOW_PRIO_7, 0x00008c), + REG(SYS_COUNT_RX_GREEN_PRIO_0, 0x000090), + REG(SYS_COUNT_RX_GREEN_PRIO_1, 0x000094), + REG(SYS_COUNT_RX_GREEN_PRIO_2, 0x000098), + REG(SYS_COUNT_RX_GREEN_PRIO_3, 0x00009c), + REG(SYS_COUNT_RX_GREEN_PRIO_4, 0x0000a0), + REG(SYS_COUNT_RX_GREEN_PRIO_5, 0x0000a4), + REG(SYS_COUNT_RX_GREEN_PRIO_6, 0x0000a8), + REG(SYS_COUNT_RX_GREEN_PRIO_7, 0x0000ac), REG(SYS_COUNT_TX_OCTETS, 0x000100), + REG(SYS_COUNT_TX_UNICAST, 0x000104), + REG(SYS_COUNT_TX_MULTICAST, 0x000108), + REG(SYS_COUNT_TX_BROADCAST, 0x00010c), REG(SYS_COUNT_TX_COLLISION, 0x000110), REG(SYS_COUNT_TX_DROPS, 0x000114), + REG(SYS_COUNT_TX_PAUSE, 0x000118), REG(SYS_COUNT_TX_64, 0x00011c), REG(SYS_COUNT_TX_65_127, 0x000120), - REG(SYS_COUNT_TX_128_511, 0x000124), - REG(SYS_COUNT_TX_512_1023, 0x000128), - REG(SYS_COUNT_TX_1024_1526, 0x00012c), - REG(SYS_COUNT_TX_1527_MAX, 0x000130), + REG(SYS_COUNT_TX_128_255, 0x000124), + REG(SYS_COUNT_TX_256_511, 0x000128), + REG(SYS_COUNT_TX_512_1023, 0x00012c), + REG(SYS_COUNT_TX_1024_1526, 0x000130), + REG(SYS_COUNT_TX_1527_MAX, 0x000134), + REG(SYS_COUNT_TX_YELLOW_PRIO_0, 0x000138), + REG(SYS_COUNT_TX_YELLOW_PRIO_1, 0x00013c), + REG(SYS_COUNT_TX_YELLOW_PRIO_2, 0x000140), + REG(SYS_COUNT_TX_YELLOW_PRIO_3, 0x000144), + REG(SYS_COUNT_TX_YELLOW_PRIO_4, 0x000148), + REG(SYS_COUNT_TX_YELLOW_PRIO_5, 0x00014c), + REG(SYS_COUNT_TX_YELLOW_PRIO_6, 0x000150), + REG(SYS_COUNT_TX_YELLOW_PRIO_7, 0x000154), + REG(SYS_COUNT_TX_GREEN_PRIO_0, 0x000158), + REG(SYS_COUNT_TX_GREEN_PRIO_1, 0x00015c), + REG(SYS_COUNT_TX_GREEN_PRIO_2, 0x000160), + REG(SYS_COUNT_TX_GREEN_PRIO_3, 0x000164), + REG(SYS_COUNT_TX_GREEN_PRIO_4, 0x000168), + REG(SYS_COUNT_TX_GREEN_PRIO_5, 0x00016c), + REG(SYS_COUNT_TX_GREEN_PRIO_6, 0x000170), + REG(SYS_COUNT_TX_GREEN_PRIO_7, 0x000174), REG(SYS_COUNT_TX_AGING, 0x000178), + REG(SYS_COUNT_DROP_LOCAL, 0x000200), + REG(SYS_COUNT_DROP_TAIL, 0x000204), + REG(SYS_COUNT_DROP_YELLOW_PRIO_0, 0x000208), + REG(SYS_COUNT_DROP_YELLOW_PRIO_1, 0x00020c), + REG(SYS_COUNT_DROP_YELLOW_PRIO_2, 0x000210), + REG(SYS_COUNT_DROP_YELLOW_PRIO_3, 0x000214), + REG(SYS_COUNT_DROP_YELLOW_PRIO_4, 0x000218), + REG(SYS_COUNT_DROP_YELLOW_PRIO_5, 0x00021c), + REG(SYS_COUNT_DROP_YELLOW_PRIO_6, 0x000220), + REG(SYS_COUNT_DROP_YELLOW_PRIO_7, 0x000224), + REG(SYS_COUNT_DROP_GREEN_PRIO_0, 0x000228), + REG(SYS_COUNT_DROP_GREEN_PRIO_1, 0x00022c), + REG(SYS_COUNT_DROP_GREEN_PRIO_2, 0x000230), + REG(SYS_COUNT_DROP_GREEN_PRIO_3, 0x000234), + REG(SYS_COUNT_DROP_GREEN_PRIO_4, 0x000238), + REG(SYS_COUNT_DROP_GREEN_PRIO_5, 0x00023c), + REG(SYS_COUNT_DROP_GREEN_PRIO_6, 0x000240), + REG(SYS_COUNT_DROP_GREEN_PRIO_7, 0x000244), REG(SYS_RESET_CFG, 0x000318), REG_RESERVED(SYS_SR_ETYPE_CFG), REG(SYS_VLAN_ETYPE_CFG, 0x000320), @@ -543,101 +614,379 @@ static const struct reg_field vsc9953_regfields[REGFIELD_MAX] = { [SYS_PAUSE_CFG_PAUSE_ENA] = REG_FIELD_ID(SYS_PAUSE_CFG, 0, 1, 11, 4), }; -static const struct ocelot_stat_layout vsc9953_stats_layout[] = { - { .offset = 0x00, .name = "rx_octets", }, - { .offset = 0x01, .name = "rx_unicast", }, - { .offset = 0x02, .name = "rx_multicast", }, - { .offset = 0x03, .name = "rx_broadcast", }, - { .offset = 0x04, .name = "rx_shorts", }, - { .offset = 0x05, .name = "rx_fragments", }, - { .offset = 0x06, .name = "rx_jabbers", }, - { .offset = 0x07, .name = "rx_crc_align_errs", }, - { .offset = 0x08, .name = "rx_sym_errs", }, - { .offset = 0x09, .name = "rx_frames_below_65_octets", }, - { .offset = 0x0A, .name = "rx_frames_65_to_127_octets", }, - { .offset = 0x0B, .name = "rx_frames_128_to_255_octets", }, - { .offset = 0x0C, .name = "rx_frames_256_to_511_octets", }, - { .offset = 0x0D, .name = "rx_frames_512_to_1023_octets", }, - { .offset = 0x0E, .name = "rx_frames_1024_to_1526_octets", }, - { .offset = 0x0F, .name = "rx_frames_over_1526_octets", }, - { .offset = 0x10, .name = "rx_pause", }, - { .offset = 0x11, .name = "rx_control", }, - { .offset = 0x12, .name = "rx_longs", }, - { .offset = 0x13, .name = "rx_classified_drops", }, - { .offset = 0x14, .name = "rx_red_prio_0", }, - { .offset = 0x15, .name = "rx_red_prio_1", }, - { .offset = 0x16, .name = "rx_red_prio_2", }, - { .offset = 0x17, .name = "rx_red_prio_3", }, - { .offset = 0x18, .name = "rx_red_prio_4", }, - { .offset = 0x19, .name = "rx_red_prio_5", }, - { .offset = 0x1A, .name = "rx_red_prio_6", }, - { .offset = 0x1B, .name = "rx_red_prio_7", }, - { .offset = 0x1C, .name = "rx_yellow_prio_0", }, - { .offset = 0x1D, .name = "rx_yellow_prio_1", }, - { .offset = 0x1E, .name = "rx_yellow_prio_2", }, - { .offset = 0x1F, .name = "rx_yellow_prio_3", }, - { .offset = 0x20, .name = "rx_yellow_prio_4", }, - { .offset = 0x21, .name = "rx_yellow_prio_5", }, - { .offset = 0x22, .name = "rx_yellow_prio_6", }, - { .offset = 0x23, .name = "rx_yellow_prio_7", }, - { .offset = 0x24, .name = "rx_green_prio_0", }, - { .offset = 0x25, .name = "rx_green_prio_1", }, - { .offset = 0x26, .name = "rx_green_prio_2", }, - { .offset = 0x27, .name = "rx_green_prio_3", }, - { .offset = 0x28, .name = "rx_green_prio_4", }, - { .offset = 0x29, .name = "rx_green_prio_5", }, - { .offset = 0x2A, .name = "rx_green_prio_6", }, - { .offset = 0x2B, .name = "rx_green_prio_7", }, - { .offset = 0x40, .name = "tx_octets", }, - { .offset = 0x41, .name = "tx_unicast", }, - { .offset = 0x42, .name = "tx_multicast", }, - { .offset = 0x43, .name = "tx_broadcast", }, - { .offset = 0x44, .name = "tx_collision", }, - { .offset = 0x45, .name = "tx_drops", }, - { .offset = 0x46, .name = "tx_pause", }, - { .offset = 0x47, .name = "tx_frames_below_65_octets", }, - { .offset = 0x48, .name = "tx_frames_65_to_127_octets", }, - { .offset = 0x49, .name = "tx_frames_128_255_octets", }, - { .offset = 0x4A, .name = "tx_frames_256_511_octets", }, - { .offset = 0x4B, .name = "tx_frames_512_1023_octets", }, - { .offset = 0x4C, .name = "tx_frames_1024_1526_octets", }, - { .offset = 0x4D, .name = "tx_frames_over_1526_octets", }, - { .offset = 0x4E, .name = "tx_yellow_prio_0", }, - { .offset = 0x4F, .name = "tx_yellow_prio_1", }, - { .offset = 0x50, .name = "tx_yellow_prio_2", }, - { .offset = 0x51, .name = "tx_yellow_prio_3", }, - { .offset = 0x52, .name = "tx_yellow_prio_4", }, - { .offset = 0x53, .name = "tx_yellow_prio_5", }, - { .offset = 0x54, .name = "tx_yellow_prio_6", }, - { .offset = 0x55, .name = "tx_yellow_prio_7", }, - { .offset = 0x56, .name = "tx_green_prio_0", }, - { .offset = 0x57, .name = "tx_green_prio_1", }, - { .offset = 0x58, .name = "tx_green_prio_2", }, - { .offset = 0x59, .name = "tx_green_prio_3", }, - { .offset = 0x5A, .name = "tx_green_prio_4", }, - { .offset = 0x5B, .name = "tx_green_prio_5", }, - { .offset = 0x5C, .name = "tx_green_prio_6", }, - { .offset = 0x5D, .name = "tx_green_prio_7", }, - { .offset = 0x5E, .name = "tx_aged", }, - { .offset = 0x80, .name = "drop_local", }, - { .offset = 0x81, .name = "drop_tail", }, - { .offset = 0x82, .name = "drop_yellow_prio_0", }, - { .offset = 0x83, .name = "drop_yellow_prio_1", }, - { .offset = 0x84, .name = "drop_yellow_prio_2", }, - { .offset = 0x85, .name = "drop_yellow_prio_3", }, - { .offset = 0x86, .name = "drop_yellow_prio_4", }, - { .offset = 0x87, .name = "drop_yellow_prio_5", }, - { .offset = 0x88, .name = "drop_yellow_prio_6", }, - { .offset = 0x89, .name = "drop_yellow_prio_7", }, - { .offset = 0x8A, .name = "drop_green_prio_0", }, - { .offset = 0x8B, .name = "drop_green_prio_1", }, - { .offset = 0x8C, .name = "drop_green_prio_2", }, - { .offset = 0x8D, .name = "drop_green_prio_3", }, - { .offset = 0x8E, .name = "drop_green_prio_4", }, - { .offset = 0x8F, .name = "drop_green_prio_5", }, - { .offset = 0x90, .name = "drop_green_prio_6", }, - { .offset = 0x91, .name = "drop_green_prio_7", }, - OCELOT_STAT_END +static const struct ocelot_stat_layout vsc9953_stats_layout[OCELOT_NUM_STATS] = { + [OCELOT_STAT_RX_OCTETS] = { + .name = "rx_octets", + .reg = SYS_COUNT_RX_OCTETS, + }, + [OCELOT_STAT_RX_UNICAST] = { + .name = "rx_unicast", + .reg = SYS_COUNT_RX_UNICAST, + }, + [OCELOT_STAT_RX_MULTICAST] = { + .name = "rx_multicast", + .reg = SYS_COUNT_RX_MULTICAST, + }, + [OCELOT_STAT_RX_BROADCAST] = { + .name = "rx_broadcast", + .reg = SYS_COUNT_RX_BROADCAST, + }, + [OCELOT_STAT_RX_SHORTS] = { + .name = "rx_shorts", + .reg = SYS_COUNT_RX_SHORTS, + }, + [OCELOT_STAT_RX_FRAGMENTS] = { + .name = "rx_fragments", + .reg = SYS_COUNT_RX_FRAGMENTS, + }, + [OCELOT_STAT_RX_JABBERS] = { + .name = "rx_jabbers", + .reg = SYS_COUNT_RX_JABBERS, + }, + [OCELOT_STAT_RX_CRC_ALIGN_ERRS] = { + .name = "rx_crc_align_errs", + .reg = SYS_COUNT_RX_CRC_ALIGN_ERRS, + }, + [OCELOT_STAT_RX_SYM_ERRS] = { + .name = "rx_sym_errs", + .reg = SYS_COUNT_RX_SYM_ERRS, + }, + [OCELOT_STAT_RX_64] = { + .name = "rx_frames_below_65_octets", + .reg = SYS_COUNT_RX_64, + }, + [OCELOT_STAT_RX_65_127] = { + .name = "rx_frames_65_to_127_octets", + .reg = SYS_COUNT_RX_65_127, + }, + [OCELOT_STAT_RX_128_255] = { + .name = "rx_frames_128_to_255_octets", + .reg = SYS_COUNT_RX_128_255, + }, + [OCELOT_STAT_RX_256_511] = { + .name = "rx_frames_256_to_511_octets", + .reg = SYS_COUNT_RX_256_511, + }, + [OCELOT_STAT_RX_512_1023] = { + .name = "rx_frames_512_to_1023_octets", + .reg = SYS_COUNT_RX_512_1023, + }, + [OCELOT_STAT_RX_1024_1526] = { + .name = "rx_frames_1024_to_1526_octets", + .reg = SYS_COUNT_RX_1024_1526, + }, + [OCELOT_STAT_RX_1527_MAX] = { + .name = "rx_frames_over_1526_octets", + .reg = SYS_COUNT_RX_1527_MAX, + }, + [OCELOT_STAT_RX_PAUSE] = { + .name = "rx_pause", + .reg = SYS_COUNT_RX_PAUSE, + }, + [OCELOT_STAT_RX_CONTROL] = { + .name = "rx_control", + .reg = SYS_COUNT_RX_CONTROL, + }, + [OCELOT_STAT_RX_LONGS] = { + .name = "rx_longs", + .reg = SYS_COUNT_RX_LONGS, + }, + [OCELOT_STAT_RX_CLASSIFIED_DROPS] = { + .name = "rx_classified_drops", + .reg = SYS_COUNT_RX_CLASSIFIED_DROPS, + }, + [OCELOT_STAT_RX_RED_PRIO_0] = { + .name = "rx_red_prio_0", + .reg = SYS_COUNT_RX_RED_PRIO_0, + }, + [OCELOT_STAT_RX_RED_PRIO_1] = { + .name = "rx_red_prio_1", + .reg = SYS_COUNT_RX_RED_PRIO_1, + }, + [OCELOT_STAT_RX_RED_PRIO_2] = { + .name = "rx_red_prio_2", + .reg = SYS_COUNT_RX_RED_PRIO_2, + }, + [OCELOT_STAT_RX_RED_PRIO_3] = { + .name = "rx_red_prio_3", + .reg = SYS_COUNT_RX_RED_PRIO_3, + }, + [OCELOT_STAT_RX_RED_PRIO_4] = { + .name = "rx_red_prio_4", + .reg = SYS_COUNT_RX_RED_PRIO_4, + }, + [OCELOT_STAT_RX_RED_PRIO_5] = { + .name = "rx_red_prio_5", + .reg = SYS_COUNT_RX_RED_PRIO_5, + }, + [OCELOT_STAT_RX_RED_PRIO_6] = { + .name = "rx_red_prio_6", + .reg = SYS_COUNT_RX_RED_PRIO_6, + }, + [OCELOT_STAT_RX_RED_PRIO_7] = { + .name = "rx_red_prio_7", + .reg = SYS_COUNT_RX_RED_PRIO_7, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_0] = { + .name = "rx_yellow_prio_0", + .reg = SYS_COUNT_RX_YELLOW_PRIO_0, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_1] = { + .name = "rx_yellow_prio_1", + .reg = SYS_COUNT_RX_YELLOW_PRIO_1, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_2] = { + .name = "rx_yellow_prio_2", + .reg = SYS_COUNT_RX_YELLOW_PRIO_2, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_3] = { + .name = "rx_yellow_prio_3", + .reg = SYS_COUNT_RX_YELLOW_PRIO_3, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_4] = { + .name = "rx_yellow_prio_4", + .reg = SYS_COUNT_RX_YELLOW_PRIO_4, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_5] = { + .name = "rx_yellow_prio_5", + .reg = SYS_COUNT_RX_YELLOW_PRIO_5, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_6] = { + .name = "rx_yellow_prio_6", + .reg = SYS_COUNT_RX_YELLOW_PRIO_6, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_7] = { + .name = "rx_yellow_prio_7", + .reg = SYS_COUNT_RX_YELLOW_PRIO_7, + }, + [OCELOT_STAT_RX_GREEN_PRIO_0] = { + .name = "rx_green_prio_0", + .reg = SYS_COUNT_RX_GREEN_PRIO_0, + }, + [OCELOT_STAT_RX_GREEN_PRIO_1] = { + .name = "rx_green_prio_1", + .reg = SYS_COUNT_RX_GREEN_PRIO_1, + }, + [OCELOT_STAT_RX_GREEN_PRIO_2] = { + .name = "rx_green_prio_2", + .reg = SYS_COUNT_RX_GREEN_PRIO_2, + }, + [OCELOT_STAT_RX_GREEN_PRIO_3] = { + .name = "rx_green_prio_3", + .reg = SYS_COUNT_RX_GREEN_PRIO_3, + }, + [OCELOT_STAT_RX_GREEN_PRIO_4] = { + .name = "rx_green_prio_4", + .reg = SYS_COUNT_RX_GREEN_PRIO_4, + }, + [OCELOT_STAT_RX_GREEN_PRIO_5] = { + .name = "rx_green_prio_5", + .reg = SYS_COUNT_RX_GREEN_PRIO_5, + }, + [OCELOT_STAT_RX_GREEN_PRIO_6] = { + .name = "rx_green_prio_6", + .reg = SYS_COUNT_RX_GREEN_PRIO_6, + }, + [OCELOT_STAT_RX_GREEN_PRIO_7] = { + .name = "rx_green_prio_7", + .reg = SYS_COUNT_RX_GREEN_PRIO_7, + }, + [OCELOT_STAT_TX_OCTETS] = { + .name = "tx_octets", + .reg = SYS_COUNT_TX_OCTETS, + }, + [OCELOT_STAT_TX_UNICAST] = { + .name = "tx_unicast", + .reg = SYS_COUNT_TX_UNICAST, + }, + [OCELOT_STAT_TX_MULTICAST] = { + .name = "tx_multicast", + .reg = SYS_COUNT_TX_MULTICAST, + }, + [OCELOT_STAT_TX_BROADCAST] = { + .name = "tx_broadcast", + .reg = SYS_COUNT_TX_BROADCAST, + }, + [OCELOT_STAT_TX_COLLISION] = { + .name = "tx_collision", + .reg = SYS_COUNT_TX_COLLISION, + }, + [OCELOT_STAT_TX_DROPS] = { + .name = "tx_drops", + .reg = SYS_COUNT_TX_DROPS, + }, + [OCELOT_STAT_TX_PAUSE] = { + .name = "tx_pause", + .reg = SYS_COUNT_TX_PAUSE, + }, + [OCELOT_STAT_TX_64] = { + .name = "tx_frames_below_65_octets", + .reg = SYS_COUNT_TX_64, + }, + [OCELOT_STAT_TX_65_127] = { + .name = "tx_frames_65_to_127_octets", + .reg = SYS_COUNT_TX_65_127, + }, + [OCELOT_STAT_TX_128_255] = { + .name = "tx_frames_128_255_octets", + .reg = SYS_COUNT_TX_128_255, + }, + [OCELOT_STAT_TX_256_511] = { + .name = "tx_frames_256_511_octets", + .reg = SYS_COUNT_TX_256_511, + }, + [OCELOT_STAT_TX_512_1023] = { + .name = "tx_frames_512_1023_octets", + .reg = SYS_COUNT_TX_512_1023, + }, + [OCELOT_STAT_TX_1024_1526] = { + .name = "tx_frames_1024_1526_octets", + .reg = SYS_COUNT_TX_1024_1526, + }, + [OCELOT_STAT_TX_1527_MAX] = { + .name = "tx_frames_over_1526_octets", + .reg = SYS_COUNT_TX_1527_MAX, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_0] = { + .name = "tx_yellow_prio_0", + .reg = SYS_COUNT_TX_YELLOW_PRIO_0, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_1] = { + .name = "tx_yellow_prio_1", + .reg = SYS_COUNT_TX_YELLOW_PRIO_1, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_2] = { + .name = "tx_yellow_prio_2", + .reg = SYS_COUNT_TX_YELLOW_PRIO_2, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_3] = { + .name = "tx_yellow_prio_3", + .reg = SYS_COUNT_TX_YELLOW_PRIO_3, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_4] = { + .name = "tx_yellow_prio_4", + .reg = SYS_COUNT_TX_YELLOW_PRIO_4, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_5] = { + .name = "tx_yellow_prio_5", + .reg = SYS_COUNT_TX_YELLOW_PRIO_5, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_6] = { + .name = "tx_yellow_prio_6", + .reg = SYS_COUNT_TX_YELLOW_PRIO_6, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_7] = { + .name = "tx_yellow_prio_7", + .reg = SYS_COUNT_TX_YELLOW_PRIO_7, + }, + [OCELOT_STAT_TX_GREEN_PRIO_0] = { + .name = "tx_green_prio_0", + .reg = SYS_COUNT_TX_GREEN_PRIO_0, + }, + [OCELOT_STAT_TX_GREEN_PRIO_1] = { + .name = "tx_green_prio_1", + .reg = SYS_COUNT_TX_GREEN_PRIO_1, + }, + [OCELOT_STAT_TX_GREEN_PRIO_2] = { + .name = "tx_green_prio_2", + .reg = SYS_COUNT_TX_GREEN_PRIO_2, + }, + [OCELOT_STAT_TX_GREEN_PRIO_3] = { + .name = "tx_green_prio_3", + .reg = SYS_COUNT_TX_GREEN_PRIO_3, + }, + [OCELOT_STAT_TX_GREEN_PRIO_4] = { + .name = "tx_green_prio_4", + .reg = SYS_COUNT_TX_GREEN_PRIO_4, + }, + [OCELOT_STAT_TX_GREEN_PRIO_5] = { + .name = "tx_green_prio_5", + .reg = SYS_COUNT_TX_GREEN_PRIO_5, + }, + [OCELOT_STAT_TX_GREEN_PRIO_6] = { + .name = "tx_green_prio_6", + .reg = SYS_COUNT_TX_GREEN_PRIO_6, + }, + [OCELOT_STAT_TX_GREEN_PRIO_7] = { + .name = "tx_green_prio_7", + .reg = SYS_COUNT_TX_GREEN_PRIO_7, + }, + [OCELOT_STAT_TX_AGED] = { + .name = "tx_aged", + .reg = SYS_COUNT_TX_AGING, + }, + [OCELOT_STAT_DROP_LOCAL] = { + .name = "drop_local", + .reg = SYS_COUNT_DROP_LOCAL, + }, + [OCELOT_STAT_DROP_TAIL] = { + .name = "drop_tail", + .reg = SYS_COUNT_DROP_TAIL, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_0] = { + .name = "drop_yellow_prio_0", + .reg = SYS_COUNT_DROP_YELLOW_PRIO_0, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_1] = { + .name = "drop_yellow_prio_1", + .reg = SYS_COUNT_DROP_YELLOW_PRIO_1, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_2] = { + .name = "drop_yellow_prio_2", + .reg = SYS_COUNT_DROP_YELLOW_PRIO_2, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_3] = { + .name = "drop_yellow_prio_3", + .reg = SYS_COUNT_DROP_YELLOW_PRIO_3, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_4] = { + .name = "drop_yellow_prio_4", + .reg = SYS_COUNT_DROP_YELLOW_PRIO_4, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_5] = { + .name = "drop_yellow_prio_5", + .reg = SYS_COUNT_DROP_YELLOW_PRIO_5, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_6] = { + .name = "drop_yellow_prio_6", + .reg = SYS_COUNT_DROP_YELLOW_PRIO_6, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_7] = { + .name = "drop_yellow_prio_7", + .reg = SYS_COUNT_DROP_YELLOW_PRIO_7, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_0] = { + .name = "drop_green_prio_0", + .reg = SYS_COUNT_DROP_GREEN_PRIO_0, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_1] = { + .name = "drop_green_prio_1", + .reg = SYS_COUNT_DROP_GREEN_PRIO_1, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_2] = { + .name = "drop_green_prio_2", + .reg = SYS_COUNT_DROP_GREEN_PRIO_2, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_3] = { + .name = "drop_green_prio_3", + .reg = SYS_COUNT_DROP_GREEN_PRIO_3, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_4] = { + .name = "drop_green_prio_4", + .reg = SYS_COUNT_DROP_GREEN_PRIO_4, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_5] = { + .name = "drop_green_prio_5", + .reg = SYS_COUNT_DROP_GREEN_PRIO_5, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_6] = { + .name = "drop_green_prio_6", + .reg = SYS_COUNT_DROP_GREEN_PRIO_6, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_7] = { + .name = "drop_green_prio_7", + .reg = SYS_COUNT_DROP_GREEN_PRIO_7, + }, }; static const struct vcap_field vsc9953_vcap_es0_keys[] = { diff --git a/drivers/net/dsa/sja1105/sja1105_devlink.c b/drivers/net/dsa/sja1105/sja1105_devlink.c index 0569ff066634..10c6fea1227f 100644 --- a/drivers/net/dsa/sja1105/sja1105_devlink.c +++ b/drivers/net/dsa/sja1105/sja1105_devlink.c @@ -93,7 +93,7 @@ static int sja1105_setup_devlink_regions(struct dsa_switch *ds) region = dsa_devlink_region_create(ds, ops, 1, size); if (IS_ERR(region)) { - while (i-- >= 0) + while (--i >= 0) dsa_devlink_region_destroy(priv->regions[i]); return PTR_ERR(region); } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c index 7071604f9984..02808513ffe4 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c @@ -13844,7 +13844,7 @@ static void bnx2x_check_kr2_wa(struct link_params *params, /* Once KR2 was disabled, wait 5 seconds before checking KR2 recovery * Since some switches tend to reinit the AN process and clear the - * the advertised BP/NP after ~2 seconds causing the KR2 to be disabled + * advertised BP/NP after ~2 seconds causing the KR2 to be disabled * and recovered many times */ if (vars->check_kr2_recovery_cnt > 0) { diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c index 84604aff53ce..89256b866840 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c @@ -243,7 +243,7 @@ static int cxgb_ulp_iscsi_ctl(struct adapter *adapter, unsigned int req, /* * on rx, the iscsi pdu has to be < rx page size and the - * the max rx data length programmed in TP + * max rx data length programmed in TP */ val = min(adapter->params.tp.rx_pg_size, ((t3_read_reg(adapter, A_TP_PARA_REG2)) >> diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h index 26433a62d7f0..fed5f93bf620 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h @@ -497,7 +497,7 @@ struct cpl_t5_pass_accept_rpl { __be32 opt2; __be64 opt0; __be32 iss; - __be32 rsvd[3]; + __be32 rsvd; }; struct cpl_act_open_req { diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c index 7d49c28215f3..3dc3c0b626c2 100644 --- a/drivers/net/ethernet/freescale/fec_ptp.c +++ b/drivers/net/ethernet/freescale/fec_ptp.c @@ -135,11 +135,7 @@ static int fec_ptp_enable_pps(struct fec_enet_private *fep, uint enable) * NSEC_PER_SEC - ts.tv_nsec. Add the remaining nanoseconds * to current timer would be next second. */ - tempval = readl(fep->hwp + FEC_ATIME_CTRL); - tempval |= FEC_T_CTRL_CAPTURE; - writel(tempval, fep->hwp + FEC_ATIME_CTRL); - - tempval = readl(fep->hwp + FEC_ATIME); + tempval = fep->cc.read(&fep->cc); /* Convert the ptp local counter to 1588 timestamp */ ns = timecounter_cyc2time(&fep->tc, tempval); ts = ns_to_timespec64(ns); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index b36bf9c3e1e4..9f1d5de7bf16 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -384,7 +384,9 @@ static void i40e_tx_timeout(struct net_device *netdev, unsigned int txqueue) set_bit(__I40E_GLOBAL_RESET_REQUESTED, pf->state); break; default: - netdev_err(netdev, "tx_timeout recovery unsuccessful\n"); + netdev_err(netdev, "tx_timeout recovery unsuccessful, device is in non-recoverable state.\n"); + set_bit(__I40E_DOWN_REQUESTED, pf->state); + set_bit(__I40E_VSI_DOWN_REQUESTED, vsi->state); break; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index f6ba97a0166e..d4226161a3ef 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -3203,11 +3203,13 @@ static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, protocol = vlan_get_protocol(skb); - if (eth_p_mpls(protocol)) + if (eth_p_mpls(protocol)) { ip.hdr = skb_inner_network_header(skb); - else + l4.hdr = skb_checksum_start(skb); + } else { ip.hdr = skb_network_header(skb); - l4.hdr = skb_checksum_start(skb); + l4.hdr = skb_transport_header(skb); + } /* set the tx_flags to indicate the IP protocol type. this is * required so that checksum header computation below is accurate. diff --git a/drivers/net/ethernet/intel/iavf/iavf_adminq.c b/drivers/net/ethernet/intel/iavf/iavf_adminq.c index cd4e6a22d0f9..9ffbd24d83cb 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_adminq.c +++ b/drivers/net/ethernet/intel/iavf/iavf_adminq.c @@ -324,6 +324,7 @@ static enum iavf_status iavf_config_arq_regs(struct iavf_hw *hw) static enum iavf_status iavf_init_asq(struct iavf_hw *hw) { enum iavf_status ret_code = 0; + int i; if (hw->aq.asq.count > 0) { /* queue already initialized */ @@ -354,12 +355,17 @@ static enum iavf_status iavf_init_asq(struct iavf_hw *hw) /* initialize base registers */ ret_code = iavf_config_asq_regs(hw); if (ret_code) - goto init_adminq_free_rings; + goto init_free_asq_bufs; /* success! */ hw->aq.asq.count = hw->aq.num_asq_entries; goto init_adminq_exit; +init_free_asq_bufs: + for (i = 0; i < hw->aq.num_asq_entries; i++) + iavf_free_dma_mem(hw, &hw->aq.asq.r.asq_bi[i]); + iavf_free_virt_mem(hw, &hw->aq.asq.dma_head); + init_adminq_free_rings: iavf_free_adminq_asq(hw); @@ -383,6 +389,7 @@ init_adminq_exit: static enum iavf_status iavf_init_arq(struct iavf_hw *hw) { enum iavf_status ret_code = 0; + int i; if (hw->aq.arq.count > 0) { /* queue already initialized */ @@ -413,12 +420,16 @@ static enum iavf_status iavf_init_arq(struct iavf_hw *hw) /* initialize base registers */ ret_code = iavf_config_arq_regs(hw); if (ret_code) - goto init_adminq_free_rings; + goto init_free_arq_bufs; /* success! */ hw->aq.arq.count = hw->aq.num_arq_entries; goto init_adminq_exit; +init_free_arq_bufs: + for (i = 0; i < hw->aq.num_arq_entries; i++) + iavf_free_dma_mem(hw, &hw->aq.arq.r.arq_bi[i]); + iavf_free_virt_mem(hw, &hw->aq.arq.dma_head); init_adminq_free_rings: iavf_free_adminq_arq(hw); diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 45d097a164ad..f39440ad5c50 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -2367,7 +2367,7 @@ static void iavf_init_get_resources(struct iavf_adapter *adapter) err = iavf_get_vf_config(adapter); if (err == -EALREADY) { err = iavf_send_vf_config_msg(adapter); - goto err_alloc; + goto err; } else if (err == -EINVAL) { /* We only get -EINVAL if the device is in a very bad * state or if we've been disabled for previous bad @@ -3086,12 +3086,15 @@ continue_reset: return; reset_err: + if (running) { + set_bit(__IAVF_VSI_DOWN, adapter->vsi.state); + iavf_free_traffic_irqs(adapter); + } + iavf_disable_vf(adapter); + mutex_unlock(&adapter->client_lock); mutex_unlock(&adapter->crit_lock); - if (running) - iavf_change_state(adapter, __IAVF_RUNNING); dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n"); - iavf_close(netdev); } /** @@ -4085,8 +4088,17 @@ static int iavf_open(struct net_device *netdev) return -EIO; } - while (!mutex_trylock(&adapter->crit_lock)) + while (!mutex_trylock(&adapter->crit_lock)) { + /* If we are in __IAVF_INIT_CONFIG_ADAPTER state the crit_lock + * is already taken and iavf_open is called from an upper + * device's notifier reacting on NETDEV_REGISTER event. + * We have to leave here to avoid dead lock. + */ + if (adapter->state == __IAVF_INIT_CONFIG_ADAPTER) + return -EBUSY; + usleep_range(500, 1000); + } if (adapter->state != __IAVF_DOWN) { err = -EBUSY; diff --git a/drivers/net/ethernet/intel/ice/ice_fltr.c b/drivers/net/ethernet/intel/ice/ice_fltr.c index 85a94483c2ed..40e678cfb507 100644 --- a/drivers/net/ethernet/intel/ice/ice_fltr.c +++ b/drivers/net/ethernet/intel/ice/ice_fltr.c @@ -62,7 +62,7 @@ ice_fltr_set_vlan_vsi_promisc(struct ice_hw *hw, struct ice_vsi *vsi, int result; result = ice_set_vlan_vsi_promisc(hw, vsi->idx, promisc_mask, false); - if (result) + if (result && result != -EEXIST) dev_err(ice_pf_to_dev(pf), "Error setting promisc mode on VSI %i (rc=%d)\n", vsi->vsi_num, result); @@ -86,7 +86,7 @@ ice_fltr_clear_vlan_vsi_promisc(struct ice_hw *hw, struct ice_vsi *vsi, int result; result = ice_set_vlan_vsi_promisc(hw, vsi->idx, promisc_mask, true); - if (result) + if (result && result != -EEXIST) dev_err(ice_pf_to_dev(pf), "Error clearing promisc mode on VSI %i (rc=%d)\n", vsi->vsi_num, result); @@ -109,7 +109,7 @@ ice_fltr_clear_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask, int result; result = ice_clear_vsi_promisc(hw, vsi_handle, promisc_mask, vid); - if (result) + if (result && result != -EEXIST) dev_err(ice_pf_to_dev(pf), "Error clearing promisc mode on VSI %i for VID %u (rc=%d)\n", ice_get_hw_vsi_num(hw, vsi_handle), vid, result); @@ -132,7 +132,7 @@ ice_fltr_set_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask, int result; result = ice_set_vsi_promisc(hw, vsi_handle, promisc_mask, vid); - if (result) + if (result && result != -EEXIST) dev_err(ice_pf_to_dev(pf), "Error setting promisc mode on VSI %i for VID %u (rc=%d)\n", ice_get_hw_vsi_num(hw, vsi_handle), vid, result); diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index a830f7f9aed0..733c455f6574 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -3181,7 +3181,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) pf = vsi->back; vtype = vsi->type; - if (WARN_ON(vtype == ICE_VSI_VF) && !vsi->vf) + if (WARN_ON(vtype == ICE_VSI_VF && !vsi->vf)) return -EINVAL; ice_vsi_init_vlan_ops(vsi); @@ -4062,7 +4062,11 @@ int ice_vsi_del_vlan_zero(struct ice_vsi *vsi) if (err && err != -EEXIST) return err; - return 0; + /* when deleting the last VLAN filter, make sure to disable the VLAN + * promisc mode so the filter isn't left by accident + */ + return ice_clear_vsi_promisc(&vsi->back->hw, vsi->idx, + ICE_MCAST_VLAN_PROMISC_BITS, 0); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index eb40526ee179..4ecaf40cf946 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -267,8 +267,10 @@ static int ice_set_promisc(struct ice_vsi *vsi, u8 promisc_m) status = ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx, promisc_m, 0); } + if (status && status != -EEXIST) + return status; - return status; + return 0; } /** @@ -3573,6 +3575,14 @@ ice_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) while (test_and_set_bit(ICE_CFG_BUSY, vsi->state)) usleep_range(1000, 2000); + ret = ice_clear_vsi_promisc(&vsi->back->hw, vsi->idx, + ICE_MCAST_VLAN_PROMISC_BITS, vid); + if (ret) { + netdev_err(netdev, "Error clearing multicast promiscuous mode on VSI %i\n", + vsi->vsi_num); + vsi->current_netdev_flags |= IFF_ALLMULTI; + } + vlan_ops = ice_get_compat_vsi_vlan_ops(vsi); /* Make sure VLAN delete is successful before updating VLAN diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 262e553e3b58..3808034f7e7e 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -4445,6 +4445,13 @@ ice_set_vlan_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask, goto free_fltr_list; list_for_each_entry(list_itr, &vsi_list_head, list_entry) { + /* Avoid enabling or disabling VLAN zero twice when in double + * VLAN mode + */ + if (ice_is_dvm_ena(hw) && + list_itr->fltr_info.l_data.vlan.tpid == 0) + continue; + vlan_id = list_itr->fltr_info.l_data.vlan.vlan_id; if (rm_vlan_promisc) status = ice_clear_vsi_promisc(hw, vsi_handle, @@ -4452,7 +4459,7 @@ ice_set_vlan_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask, else status = ice_set_vsi_promisc(hw, vsi_handle, promisc_mask, vlan_id); - if (status) + if (status && status != -EEXIST) break; } diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c index 8fd7c3e37f5e..0abeed092de1 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c @@ -571,8 +571,10 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags) if (ice_is_vf_disabled(vf)) { vsi = ice_get_vf_vsi(vf); - if (WARN_ON(!vsi)) + if (!vsi) { + dev_dbg(dev, "VF is already removed\n"); return -EINVAL; + } ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, vf->vf_id); ice_vsi_stop_all_rx_rings(vsi); dev_dbg(dev, "VF is already disabled, there is no need for resetting it, telling VM, all is fine %d\n", @@ -762,13 +764,16 @@ static int ice_cfg_mac_antispoof(struct ice_vsi *vsi, bool enable) static int ice_vsi_ena_spoofchk(struct ice_vsi *vsi) { struct ice_vsi_vlan_ops *vlan_ops; - int err; + int err = 0; vlan_ops = ice_get_compat_vsi_vlan_ops(vsi); - err = vlan_ops->ena_tx_filtering(vsi); - if (err) - return err; + /* Allow VF with VLAN 0 only to send all tagged traffic */ + if (vsi->type != ICE_VSI_VF || ice_vsi_has_non_zero_vlans(vsi)) { + err = vlan_ops->ena_tx_filtering(vsi); + if (err) + return err; + } return ice_cfg_mac_antispoof(vsi, true); } diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index 094e3c97a1ea..2b4c791b6cba 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -2288,6 +2288,15 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) /* Enable VLAN filtering on first non-zero VLAN */ if (!vlan_promisc && vid && !ice_is_dvm_ena(&pf->hw)) { + if (vf->spoofchk) { + status = vsi->inner_vlan_ops.ena_tx_filtering(vsi); + if (status) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + dev_err(dev, "Enable VLAN anti-spoofing on VLAN ID: %d failed error-%d\n", + vid, status); + goto error_param; + } + } if (vsi->inner_vlan_ops.ena_rx_filtering(vsi)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; dev_err(dev, "Enable VLAN pruning on VLAN ID: %d failed error-%d\n", @@ -2333,8 +2342,10 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) } /* Disable VLAN filtering when only VLAN 0 is left */ - if (!ice_vsi_has_non_zero_vlans(vsi)) + if (!ice_vsi_has_non_zero_vlans(vsi)) { + vsi->inner_vlan_ops.dis_tx_filtering(vsi); vsi->inner_vlan_ops.dis_rx_filtering(vsi); + } if (vlan_promisc) ice_vf_dis_vlan_promisc(vsi, &vlan); @@ -2838,6 +2849,13 @@ ice_vc_del_vlans(struct ice_vf *vf, struct ice_vsi *vsi, if (vlan_promisc) ice_vf_dis_vlan_promisc(vsi, &vlan); + + /* Disable VLAN filtering when only VLAN 0 is left */ + if (!ice_vsi_has_non_zero_vlans(vsi) && ice_is_dvm_ena(&vsi->back->hw)) { + err = vsi->outer_vlan_ops.dis_tx_filtering(vsi); + if (err) + return err; + } } vc_vlan = &vlan_fltr->inner; @@ -2853,8 +2871,17 @@ ice_vc_del_vlans(struct ice_vf *vf, struct ice_vsi *vsi, /* no support for VLAN promiscuous on inner VLAN unless * we are in Single VLAN Mode (SVM) */ - if (!ice_is_dvm_ena(&vsi->back->hw) && vlan_promisc) - ice_vf_dis_vlan_promisc(vsi, &vlan); + if (!ice_is_dvm_ena(&vsi->back->hw)) { + if (vlan_promisc) + ice_vf_dis_vlan_promisc(vsi, &vlan); + + /* Disable VLAN filtering when only VLAN 0 is left */ + if (!ice_vsi_has_non_zero_vlans(vsi)) { + err = vsi->inner_vlan_ops.dis_tx_filtering(vsi); + if (err) + return err; + } + } } } @@ -2931,6 +2958,13 @@ ice_vc_add_vlans(struct ice_vf *vf, struct ice_vsi *vsi, if (err) return err; } + + /* Enable VLAN filtering on first non-zero VLAN */ + if (vf->spoofchk && vlan.vid && ice_is_dvm_ena(&vsi->back->hw)) { + err = vsi->outer_vlan_ops.ena_tx_filtering(vsi); + if (err) + return err; + } } vc_vlan = &vlan_fltr->inner; @@ -2946,10 +2980,19 @@ ice_vc_add_vlans(struct ice_vf *vf, struct ice_vsi *vsi, /* no support for VLAN promiscuous on inner VLAN unless * we are in Single VLAN Mode (SVM) */ - if (!ice_is_dvm_ena(&vsi->back->hw) && vlan_promisc) { - err = ice_vf_ena_vlan_promisc(vsi, &vlan); - if (err) - return err; + if (!ice_is_dvm_ena(&vsi->back->hw)) { + if (vlan_promisc) { + err = ice_vf_ena_vlan_promisc(vsi, &vlan); + if (err) + return err; + } + + /* Enable VLAN filtering on first non-zero VLAN */ + if (vf->spoofchk && vlan.vid) { + err = vsi->inner_vlan_ops.ena_tx_filtering(vsi); + if (err) + return err; + } } } } diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 2d3daf022651..015b78144114 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -664,6 +664,8 @@ struct igb_adapter { struct igb_mac_addr *mac_table; struct vf_mac_filter vf_macs; struct vf_mac_filter *vf_mac_list; + /* lock for VF resources */ + spinlock_t vfs_lock; }; /* flags controlling PTP/1588 function */ diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index d8b836a85cc3..2796e81d2726 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -3637,6 +3637,7 @@ static int igb_disable_sriov(struct pci_dev *pdev) struct net_device *netdev = pci_get_drvdata(pdev); struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; + unsigned long flags; /* reclaim resources allocated to VFs */ if (adapter->vf_data) { @@ -3649,12 +3650,13 @@ static int igb_disable_sriov(struct pci_dev *pdev) pci_disable_sriov(pdev); msleep(500); } - + spin_lock_irqsave(&adapter->vfs_lock, flags); kfree(adapter->vf_mac_list); adapter->vf_mac_list = NULL; kfree(adapter->vf_data); adapter->vf_data = NULL; adapter->vfs_allocated_count = 0; + spin_unlock_irqrestore(&adapter->vfs_lock, flags); wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ); wrfl(); msleep(100); @@ -3814,7 +3816,9 @@ static void igb_remove(struct pci_dev *pdev) igb_release_hw_control(adapter); #ifdef CONFIG_PCI_IOV + rtnl_lock(); igb_disable_sriov(pdev); + rtnl_unlock(); #endif unregister_netdev(netdev); @@ -3974,6 +3978,9 @@ static int igb_sw_init(struct igb_adapter *adapter) spin_lock_init(&adapter->nfc_lock); spin_lock_init(&adapter->stats64_lock); + + /* init spinlock to avoid concurrency of VF resources */ + spin_lock_init(&adapter->vfs_lock); #ifdef CONFIG_PCI_IOV switch (hw->mac.type) { case e1000_82576: @@ -7958,8 +7965,10 @@ unlock: static void igb_msg_task(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; + unsigned long flags; u32 vf; + spin_lock_irqsave(&adapter->vfs_lock, flags); for (vf = 0; vf < adapter->vfs_allocated_count; vf++) { /* process any reset requests */ if (!igb_check_for_rst(hw, vf)) @@ -7973,6 +7982,7 @@ static void igb_msg_task(struct igb_adapter *adapter) if (!igb_check_for_ack(hw, vf)) igb_rcv_ack_from_vf(adapter, vf); } + spin_unlock_irqrestore(&adapter->vfs_lock, flags); } /** diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index d9426b01f462..8aff4c0c28bd 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1732,7 +1732,7 @@ static u32 mtk_xdp_run(struct mtk_eth *eth, struct mtk_rx_ring *ring, case XDP_TX: { struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp); - if (mtk_xdp_submit_frame(eth, xdpf, dev, false)) { + if (!xdpf || mtk_xdp_submit_frame(eth, xdpf, dev, false)) { count = &hw_stats->xdp_stats.rx_xdp_tx_errors; act = XDP_DROP; break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 4c1599de652c..0c66774a1720 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -696,6 +696,13 @@ static int mlx5e_init_rep(struct mlx5_core_dev *mdev, { struct mlx5e_priv *priv = netdev_priv(netdev); + priv->fs = mlx5e_fs_init(priv->profile, mdev, + !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); + if (!priv->fs) { + netdev_err(priv->netdev, "FS allocation failed\n"); + return -ENOMEM; + } + mlx5e_build_rep_params(netdev); mlx5e_timestamp_init(priv); @@ -708,12 +715,21 @@ static int mlx5e_init_ul_rep(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv = netdev_priv(netdev); int err; + priv->fs = mlx5e_fs_init(priv->profile, mdev, + !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); + if (!priv->fs) { + netdev_err(priv->netdev, "FS allocation failed\n"); + return -ENOMEM; + } + err = mlx5e_ipsec_init(priv); if (err) mlx5_core_err(mdev, "Uplink rep IPsec initialization failed, %d\n", err); mlx5e_vxlan_set_netdev_info(priv); - return mlx5e_init_rep(mdev, netdev); + mlx5e_build_rep_params(netdev); + mlx5e_timestamp_init(priv); + return 0; } static void mlx5e_cleanup_rep(struct mlx5e_priv *priv) @@ -836,13 +852,6 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) struct mlx5_core_dev *mdev = priv->mdev; int err; - priv->fs = mlx5e_fs_init(priv->profile, mdev, - !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); - if (!priv->fs) { - netdev_err(priv->netdev, "FS allocation failed\n"); - return -ENOMEM; - } - priv->rx_res = mlx5e_rx_res_alloc(); if (!priv->rx_res) { err = -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 1e240cdd9cbd..30c7b0e15721 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1897,9 +1897,9 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u16 local_port) cancel_delayed_work_sync(&mlxsw_sp_port->periodic_hw_stats.update_dw); cancel_delayed_work_sync(&mlxsw_sp_port->ptp.shaper_dw); - mlxsw_sp_port_ptp_clear(mlxsw_sp_port); mlxsw_core_port_clear(mlxsw_sp->core, local_port, mlxsw_sp); unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */ + mlxsw_sp_port_ptp_clear(mlxsw_sp_port); mlxsw_sp_port_vlan_classification_set(mlxsw_sp_port, true, true); mlxsw_sp->ports[local_port] = NULL; mlxsw_sp_port_vlan_flush(mlxsw_sp_port, true); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c index 2e0b704b8a31..7b01b9c20722 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c @@ -46,6 +46,7 @@ struct mlxsw_sp2_ptp_state { * enabled. */ struct hwtstamp_config config; + struct mutex lock; /* Protects 'config' and HW configuration. */ }; struct mlxsw_sp1_ptp_key { @@ -1374,6 +1375,7 @@ struct mlxsw_sp_ptp_state *mlxsw_sp2_ptp_init(struct mlxsw_sp *mlxsw_sp) goto err_ptp_traps_set; refcount_set(&ptp_state->ptp_port_enabled_ref, 0); + mutex_init(&ptp_state->lock); return &ptp_state->common; err_ptp_traps_set: @@ -1388,6 +1390,7 @@ void mlxsw_sp2_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state_common) ptp_state = mlxsw_sp2_ptp_state(mlxsw_sp); + mutex_destroy(&ptp_state->lock); mlxsw_sp_ptp_traps_unset(mlxsw_sp); kfree(ptp_state); } @@ -1461,7 +1464,10 @@ int mlxsw_sp2_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, ptp_state = mlxsw_sp2_ptp_state(mlxsw_sp_port->mlxsw_sp); + mutex_lock(&ptp_state->lock); *config = ptp_state->config; + mutex_unlock(&ptp_state->lock); + return 0; } @@ -1523,6 +1529,9 @@ mlxsw_sp2_ptp_get_message_types(const struct hwtstamp_config *config, return -EINVAL; } + if ((ing_types && !egr_types) || (!ing_types && egr_types)) + return -EINVAL; + *p_ing_types = ing_types; *p_egr_types = egr_types; return 0; @@ -1574,8 +1583,6 @@ static int mlxsw_sp2_ptp_configure_port(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp2_ptp_state *ptp_state; int err; - ASSERT_RTNL(); - ptp_state = mlxsw_sp2_ptp_state(mlxsw_sp_port->mlxsw_sp); if (refcount_inc_not_zero(&ptp_state->ptp_port_enabled_ref)) @@ -1597,8 +1604,6 @@ static int mlxsw_sp2_ptp_deconfigure_port(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp2_ptp_state *ptp_state; int err; - ASSERT_RTNL(); - ptp_state = mlxsw_sp2_ptp_state(mlxsw_sp_port->mlxsw_sp); if (!refcount_dec_and_test(&ptp_state->ptp_port_enabled_ref)) @@ -1618,16 +1623,20 @@ err_ptp_disable: int mlxsw_sp2_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, struct hwtstamp_config *config) { + struct mlxsw_sp2_ptp_state *ptp_state; enum hwtstamp_rx_filters rx_filter; struct hwtstamp_config new_config; u16 new_ing_types, new_egr_types; bool ptp_enabled; int err; + ptp_state = mlxsw_sp2_ptp_state(mlxsw_sp_port->mlxsw_sp); + mutex_lock(&ptp_state->lock); + err = mlxsw_sp2_ptp_get_message_types(config, &new_ing_types, &new_egr_types, &rx_filter); if (err) - return err; + goto err_get_message_types; new_config.flags = config->flags; new_config.tx_type = config->tx_type; @@ -1640,11 +1649,11 @@ int mlxsw_sp2_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, err = mlxsw_sp2_ptp_configure_port(mlxsw_sp_port, new_ing_types, new_egr_types, new_config); if (err) - return err; + goto err_configure_port; } else if (!new_ing_types && !new_egr_types && ptp_enabled) { err = mlxsw_sp2_ptp_deconfigure_port(mlxsw_sp_port, new_config); if (err) - return err; + goto err_deconfigure_port; } mlxsw_sp_port->ptp.ing_types = new_ing_types; @@ -1652,8 +1661,15 @@ int mlxsw_sp2_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, /* Notify the ioctl caller what we are actually timestamping. */ config->rx_filter = rx_filter; + mutex_unlock(&ptp_state->lock); return 0; + +err_deconfigure_port: +err_configure_port: +err_get_message_types: + mutex_unlock(&ptp_state->lock); + return err; } int mlxsw_sp2_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h index 2d1628fdefc1..a8b88230959a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h @@ -171,10 +171,11 @@ static inline void mlxsw_sp1_get_stats(struct mlxsw_sp_port *mlxsw_sp_port, { } -int mlxsw_sp_ptp_txhdr_construct(struct mlxsw_core *mlxsw_core, - struct mlxsw_sp_port *mlxsw_sp_port, - struct sk_buff *skb, - const struct mlxsw_tx_info *tx_info) +static inline int +mlxsw_sp_ptp_txhdr_construct(struct mlxsw_core *mlxsw_core, + struct mlxsw_sp_port *mlxsw_sp_port, + struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info) { return -EOPNOTSUPP; } @@ -231,10 +232,11 @@ static inline int mlxsw_sp2_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, return mlxsw_sp_ptp_get_ts_info_noptp(info); } -int mlxsw_sp2_ptp_txhdr_construct(struct mlxsw_core *mlxsw_core, - struct mlxsw_sp_port *mlxsw_sp_port, - struct sk_buff *skb, - const struct mlxsw_tx_info *tx_info) +static inline int +mlxsw_sp2_ptp_txhdr_construct(struct mlxsw_core *mlxsw_core, + struct mlxsw_sp_port *mlxsw_sp_port, + struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info) { return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index 1d6e3b641b2e..d928b75f3780 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -710,7 +710,7 @@ static void lan966x_cleanup_ports(struct lan966x *lan966x) disable_irq(lan966x->xtr_irq); lan966x->xtr_irq = -ENXIO; - if (lan966x->ana_irq) { + if (lan966x->ana_irq > 0) { disable_irq(lan966x->ana_irq); lan966x->ana_irq = -ENXIO; } @@ -718,10 +718,10 @@ static void lan966x_cleanup_ports(struct lan966x *lan966x) if (lan966x->fdma) devm_free_irq(lan966x->dev, lan966x->fdma_irq, lan966x); - if (lan966x->ptp_irq) + if (lan966x->ptp_irq > 0) devm_free_irq(lan966x->dev, lan966x->ptp_irq, lan966x); - if (lan966x->ptp_ext_irq) + if (lan966x->ptp_ext_irq > 0) devm_free_irq(lan966x->dev, lan966x->ptp_ext_irq, lan966x); } @@ -1049,7 +1049,7 @@ static int lan966x_probe(struct platform_device *pdev) } lan966x->ana_irq = platform_get_irq_byname(pdev, "ana"); - if (lan966x->ana_irq) { + if (lan966x->ana_irq > 0) { err = devm_request_threaded_irq(&pdev->dev, lan966x->ana_irq, NULL, lan966x_ana_irq_handler, IRQF_ONESHOT, "ana irq", lan966x); diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c index a3214a762e4b..19009a6bd33a 100644 --- a/drivers/net/ethernet/moxa/moxart_ether.c +++ b/drivers/net/ethernet/moxa/moxart_ether.c @@ -62,9 +62,6 @@ static int moxart_set_mac_address(struct net_device *ndev, void *addr) { struct sockaddr *address = addr; - if (!is_valid_ether_addr(address->sa_data)) - return -EADDRNOTAVAIL; - eth_hw_addr_set(ndev, address->sa_data); moxart_update_mac_address(ndev); @@ -77,7 +74,7 @@ static void moxart_mac_free_memory(struct net_device *ndev) int i; for (i = 0; i < RX_DESC_NUM; i++) - dma_unmap_single(&ndev->dev, priv->rx_mapping[i], + dma_unmap_single(&priv->pdev->dev, priv->rx_mapping[i], priv->rx_buf_size, DMA_FROM_DEVICE); if (priv->tx_desc_base) @@ -147,11 +144,11 @@ static void moxart_mac_setup_desc_ring(struct net_device *ndev) desc + RX_REG_OFFSET_DESC1); priv->rx_buf[i] = priv->rx_buf_base + priv->rx_buf_size * i; - priv->rx_mapping[i] = dma_map_single(&ndev->dev, + priv->rx_mapping[i] = dma_map_single(&priv->pdev->dev, priv->rx_buf[i], priv->rx_buf_size, DMA_FROM_DEVICE); - if (dma_mapping_error(&ndev->dev, priv->rx_mapping[i])) + if (dma_mapping_error(&priv->pdev->dev, priv->rx_mapping[i])) netdev_err(ndev, "DMA mapping error\n"); moxart_desc_write(priv->rx_mapping[i], @@ -172,9 +169,6 @@ static int moxart_mac_open(struct net_device *ndev) { struct moxart_mac_priv_t *priv = netdev_priv(ndev); - if (!is_valid_ether_addr(ndev->dev_addr)) - return -EADDRNOTAVAIL; - napi_enable(&priv->napi); moxart_mac_reset(ndev); @@ -240,7 +234,7 @@ static int moxart_rx_poll(struct napi_struct *napi, int budget) if (len > RX_BUF_SIZE) len = RX_BUF_SIZE; - dma_sync_single_for_cpu(&ndev->dev, + dma_sync_single_for_cpu(&priv->pdev->dev, priv->rx_mapping[rx_head], priv->rx_buf_size, DMA_FROM_DEVICE); skb = netdev_alloc_skb_ip_align(ndev, len); @@ -294,7 +288,7 @@ static void moxart_tx_finished(struct net_device *ndev) unsigned int tx_tail = priv->tx_tail; while (tx_tail != tx_head) { - dma_unmap_single(&ndev->dev, priv->tx_mapping[tx_tail], + dma_unmap_single(&priv->pdev->dev, priv->tx_mapping[tx_tail], priv->tx_len[tx_tail], DMA_TO_DEVICE); ndev->stats.tx_packets++; @@ -358,9 +352,9 @@ static netdev_tx_t moxart_mac_start_xmit(struct sk_buff *skb, len = skb->len > TX_BUF_SIZE ? TX_BUF_SIZE : skb->len; - priv->tx_mapping[tx_head] = dma_map_single(&ndev->dev, skb->data, + priv->tx_mapping[tx_head] = dma_map_single(&priv->pdev->dev, skb->data, len, DMA_TO_DEVICE); - if (dma_mapping_error(&ndev->dev, priv->tx_mapping[tx_head])) { + if (dma_mapping_error(&priv->pdev->dev, priv->tx_mapping[tx_head])) { netdev_err(ndev, "DMA mapping error\n"); goto out_unlock; } @@ -379,7 +373,7 @@ static netdev_tx_t moxart_mac_start_xmit(struct sk_buff *skb, len = ETH_ZLEN; } - dma_sync_single_for_device(&ndev->dev, priv->tx_mapping[tx_head], + dma_sync_single_for_device(&priv->pdev->dev, priv->tx_mapping[tx_head], priv->tx_buf_size, DMA_TO_DEVICE); txdes1 = TX_DESC1_LTS | TX_DESC1_FTS | (len & TX_DESC1_BUF_SIZE_MASK); @@ -488,12 +482,19 @@ static int moxart_mac_probe(struct platform_device *pdev) } ndev->base_addr = res->start; + ret = platform_get_ethdev_address(p_dev, ndev); + if (ret == -EPROBE_DEFER) + goto init_fail; + if (ret) + eth_hw_addr_random(ndev); + moxart_update_mac_address(ndev); + spin_lock_init(&priv->txlock); priv->tx_buf_size = TX_BUF_SIZE; priv->rx_buf_size = RX_BUF_SIZE; - priv->tx_desc_base = dma_alloc_coherent(&pdev->dev, TX_REG_DESC_SIZE * + priv->tx_desc_base = dma_alloc_coherent(p_dev, TX_REG_DESC_SIZE * TX_DESC_NUM, &priv->tx_base, GFP_DMA | GFP_KERNEL); if (!priv->tx_desc_base) { @@ -501,7 +502,7 @@ static int moxart_mac_probe(struct platform_device *pdev) goto init_fail; } - priv->rx_desc_base = dma_alloc_coherent(&pdev->dev, RX_REG_DESC_SIZE * + priv->rx_desc_base = dma_alloc_coherent(p_dev, RX_REG_DESC_SIZE * RX_DESC_NUM, &priv->rx_base, GFP_DMA | GFP_KERNEL); if (!priv->rx_desc_base) { diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index d4649e4ee0e7..306026e6aa11 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1860,16 +1860,20 @@ void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data) if (sset != ETH_SS_STATS) return; - for (i = 0; i < ocelot->num_stats; i++) + for (i = 0; i < OCELOT_NUM_STATS; i++) { + if (ocelot->stats_layout[i].name[0] == '\0') + continue; + memcpy(data + i * ETH_GSTRING_LEN, ocelot->stats_layout[i].name, ETH_GSTRING_LEN); + } } EXPORT_SYMBOL(ocelot_get_strings); /* Caller must hold &ocelot->stats_lock */ static int ocelot_port_update_stats(struct ocelot *ocelot, int port) { - unsigned int idx = port * ocelot->num_stats; + unsigned int idx = port * OCELOT_NUM_STATS; struct ocelot_stats_region *region; int err, j; @@ -1877,9 +1881,8 @@ static int ocelot_port_update_stats(struct ocelot *ocelot, int port) ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(port), SYS_STAT_CFG); list_for_each_entry(region, &ocelot->stats_regions, node) { - err = ocelot_bulk_read_rix(ocelot, SYS_COUNT_RX_OCTETS, - region->offset, region->buf, - region->count); + err = ocelot_bulk_read(ocelot, region->base, region->buf, + region->count); if (err) return err; @@ -1906,13 +1909,13 @@ static void ocelot_check_stats_work(struct work_struct *work) stats_work); int i, err; - mutex_lock(&ocelot->stats_lock); + spin_lock(&ocelot->stats_lock); for (i = 0; i < ocelot->num_phys_ports; i++) { err = ocelot_port_update_stats(ocelot, i); if (err) break; } - mutex_unlock(&ocelot->stats_lock); + spin_unlock(&ocelot->stats_lock); if (err) dev_err(ocelot->dev, "Error %d updating ethtool stats\n", err); @@ -1925,16 +1928,22 @@ void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data) { int i, err; - mutex_lock(&ocelot->stats_lock); + spin_lock(&ocelot->stats_lock); /* check and update now */ err = ocelot_port_update_stats(ocelot, port); - /* Copy all counters */ - for (i = 0; i < ocelot->num_stats; i++) - *data++ = ocelot->stats[port * ocelot->num_stats + i]; + /* Copy all supported counters */ + for (i = 0; i < OCELOT_NUM_STATS; i++) { + int index = port * OCELOT_NUM_STATS + i; + + if (ocelot->stats_layout[i].name[0] == '\0') + continue; + + *data++ = ocelot->stats[index]; + } - mutex_unlock(&ocelot->stats_lock); + spin_unlock(&ocelot->stats_lock); if (err) dev_err(ocelot->dev, "Error %d updating ethtool stats\n", err); @@ -1943,10 +1952,16 @@ EXPORT_SYMBOL(ocelot_get_ethtool_stats); int ocelot_get_sset_count(struct ocelot *ocelot, int port, int sset) { + int i, num_stats = 0; + if (sset != ETH_SS_STATS) return -EOPNOTSUPP; - return ocelot->num_stats; + for (i = 0; i < OCELOT_NUM_STATS; i++) + if (ocelot->stats_layout[i].name[0] != '\0') + num_stats++; + + return num_stats; } EXPORT_SYMBOL(ocelot_get_sset_count); @@ -1958,8 +1973,11 @@ static int ocelot_prepare_stats_regions(struct ocelot *ocelot) INIT_LIST_HEAD(&ocelot->stats_regions); - for (i = 0; i < ocelot->num_stats; i++) { - if (region && ocelot->stats_layout[i].offset == last + 1) { + for (i = 0; i < OCELOT_NUM_STATS; i++) { + if (ocelot->stats_layout[i].name[0] == '\0') + continue; + + if (region && ocelot->stats_layout[i].reg == last + 4) { region->count++; } else { region = devm_kzalloc(ocelot->dev, sizeof(*region), @@ -1967,12 +1985,12 @@ static int ocelot_prepare_stats_regions(struct ocelot *ocelot) if (!region) return -ENOMEM; - region->offset = ocelot->stats_layout[i].offset; + region->base = ocelot->stats_layout[i].reg; region->count = 1; list_add_tail(®ion->node, &ocelot->stats_regions); } - last = ocelot->stats_layout[i].offset; + last = ocelot->stats_layout[i].reg; } list_for_each_entry(region, &ocelot->stats_regions, node) { @@ -3340,7 +3358,6 @@ static void ocelot_detect_features(struct ocelot *ocelot) int ocelot_init(struct ocelot *ocelot) { - const struct ocelot_stat_layout *stat; char queue_name[32]; int i, ret; u32 port; @@ -3353,17 +3370,13 @@ int ocelot_init(struct ocelot *ocelot) } } - ocelot->num_stats = 0; - for_each_stat(ocelot, stat) - ocelot->num_stats++; - ocelot->stats = devm_kcalloc(ocelot->dev, - ocelot->num_phys_ports * ocelot->num_stats, + ocelot->num_phys_ports * OCELOT_NUM_STATS, sizeof(u64), GFP_KERNEL); if (!ocelot->stats) return -ENOMEM; - mutex_init(&ocelot->stats_lock); + spin_lock_init(&ocelot->stats_lock); mutex_init(&ocelot->ptp_lock); mutex_init(&ocelot->mact_lock); mutex_init(&ocelot->fwd_domain_lock); @@ -3511,7 +3524,6 @@ void ocelot_deinit(struct ocelot *ocelot) cancel_delayed_work(&ocelot->stats_work); destroy_workqueue(ocelot->stats_queue); destroy_workqueue(ocelot->owq); - mutex_destroy(&ocelot->stats_lock); } EXPORT_SYMBOL(ocelot_deinit); diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index 5e6136e80282..330d30841cdc 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -725,37 +725,42 @@ static void ocelot_get_stats64(struct net_device *dev, struct ocelot_port_private *priv = netdev_priv(dev); struct ocelot *ocelot = priv->port.ocelot; int port = priv->port.index; + u64 *s; - /* Configure the port to read the stats from */ - ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(port), - SYS_STAT_CFG); + spin_lock(&ocelot->stats_lock); + + s = &ocelot->stats[port * OCELOT_NUM_STATS]; /* Get Rx stats */ - stats->rx_bytes = ocelot_read(ocelot, SYS_COUNT_RX_OCTETS); - stats->rx_packets = ocelot_read(ocelot, SYS_COUNT_RX_SHORTS) + - ocelot_read(ocelot, SYS_COUNT_RX_FRAGMENTS) + - ocelot_read(ocelot, SYS_COUNT_RX_JABBERS) + - ocelot_read(ocelot, SYS_COUNT_RX_LONGS) + - ocelot_read(ocelot, SYS_COUNT_RX_64) + - ocelot_read(ocelot, SYS_COUNT_RX_65_127) + - ocelot_read(ocelot, SYS_COUNT_RX_128_255) + - ocelot_read(ocelot, SYS_COUNT_RX_256_1023) + - ocelot_read(ocelot, SYS_COUNT_RX_1024_1526) + - ocelot_read(ocelot, SYS_COUNT_RX_1527_MAX); - stats->multicast = ocelot_read(ocelot, SYS_COUNT_RX_MULTICAST); + stats->rx_bytes = s[OCELOT_STAT_RX_OCTETS]; + stats->rx_packets = s[OCELOT_STAT_RX_SHORTS] + + s[OCELOT_STAT_RX_FRAGMENTS] + + s[OCELOT_STAT_RX_JABBERS] + + s[OCELOT_STAT_RX_LONGS] + + s[OCELOT_STAT_RX_64] + + s[OCELOT_STAT_RX_65_127] + + s[OCELOT_STAT_RX_128_255] + + s[OCELOT_STAT_RX_256_511] + + s[OCELOT_STAT_RX_512_1023] + + s[OCELOT_STAT_RX_1024_1526] + + s[OCELOT_STAT_RX_1527_MAX]; + stats->multicast = s[OCELOT_STAT_RX_MULTICAST]; stats->rx_dropped = dev->stats.rx_dropped; /* Get Tx stats */ - stats->tx_bytes = ocelot_read(ocelot, SYS_COUNT_TX_OCTETS); - stats->tx_packets = ocelot_read(ocelot, SYS_COUNT_TX_64) + - ocelot_read(ocelot, SYS_COUNT_TX_65_127) + - ocelot_read(ocelot, SYS_COUNT_TX_128_511) + - ocelot_read(ocelot, SYS_COUNT_TX_512_1023) + - ocelot_read(ocelot, SYS_COUNT_TX_1024_1526) + - ocelot_read(ocelot, SYS_COUNT_TX_1527_MAX); - stats->tx_dropped = ocelot_read(ocelot, SYS_COUNT_TX_DROPS) + - ocelot_read(ocelot, SYS_COUNT_TX_AGING); - stats->collisions = ocelot_read(ocelot, SYS_COUNT_TX_COLLISION); + stats->tx_bytes = s[OCELOT_STAT_TX_OCTETS]; + stats->tx_packets = s[OCELOT_STAT_TX_64] + + s[OCELOT_STAT_TX_65_127] + + s[OCELOT_STAT_TX_128_255] + + s[OCELOT_STAT_TX_256_511] + + s[OCELOT_STAT_TX_512_1023] + + s[OCELOT_STAT_TX_1024_1526] + + s[OCELOT_STAT_TX_1527_MAX]; + stats->tx_dropped = s[OCELOT_STAT_TX_DROPS] + + s[OCELOT_STAT_TX_AGED]; + stats->collisions = s[OCELOT_STAT_TX_COLLISION]; + + spin_unlock(&ocelot->stats_lock); } static int ocelot_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index 961f803aca19..9c488953f541 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -96,101 +96,379 @@ static const struct reg_field ocelot_regfields[REGFIELD_MAX] = { [SYS_PAUSE_CFG_PAUSE_ENA] = REG_FIELD_ID(SYS_PAUSE_CFG, 0, 1, 12, 4), }; -static const struct ocelot_stat_layout ocelot_stats_layout[] = { - { .name = "rx_octets", .offset = 0x00, }, - { .name = "rx_unicast", .offset = 0x01, }, - { .name = "rx_multicast", .offset = 0x02, }, - { .name = "rx_broadcast", .offset = 0x03, }, - { .name = "rx_shorts", .offset = 0x04, }, - { .name = "rx_fragments", .offset = 0x05, }, - { .name = "rx_jabbers", .offset = 0x06, }, - { .name = "rx_crc_align_errs", .offset = 0x07, }, - { .name = "rx_sym_errs", .offset = 0x08, }, - { .name = "rx_frames_below_65_octets", .offset = 0x09, }, - { .name = "rx_frames_65_to_127_octets", .offset = 0x0A, }, - { .name = "rx_frames_128_to_255_octets", .offset = 0x0B, }, - { .name = "rx_frames_256_to_511_octets", .offset = 0x0C, }, - { .name = "rx_frames_512_to_1023_octets", .offset = 0x0D, }, - { .name = "rx_frames_1024_to_1526_octets", .offset = 0x0E, }, - { .name = "rx_frames_over_1526_octets", .offset = 0x0F, }, - { .name = "rx_pause", .offset = 0x10, }, - { .name = "rx_control", .offset = 0x11, }, - { .name = "rx_longs", .offset = 0x12, }, - { .name = "rx_classified_drops", .offset = 0x13, }, - { .name = "rx_red_prio_0", .offset = 0x14, }, - { .name = "rx_red_prio_1", .offset = 0x15, }, - { .name = "rx_red_prio_2", .offset = 0x16, }, - { .name = "rx_red_prio_3", .offset = 0x17, }, - { .name = "rx_red_prio_4", .offset = 0x18, }, - { .name = "rx_red_prio_5", .offset = 0x19, }, - { .name = "rx_red_prio_6", .offset = 0x1A, }, - { .name = "rx_red_prio_7", .offset = 0x1B, }, - { .name = "rx_yellow_prio_0", .offset = 0x1C, }, - { .name = "rx_yellow_prio_1", .offset = 0x1D, }, - { .name = "rx_yellow_prio_2", .offset = 0x1E, }, - { .name = "rx_yellow_prio_3", .offset = 0x1F, }, - { .name = "rx_yellow_prio_4", .offset = 0x20, }, - { .name = "rx_yellow_prio_5", .offset = 0x21, }, - { .name = "rx_yellow_prio_6", .offset = 0x22, }, - { .name = "rx_yellow_prio_7", .offset = 0x23, }, - { .name = "rx_green_prio_0", .offset = 0x24, }, - { .name = "rx_green_prio_1", .offset = 0x25, }, - { .name = "rx_green_prio_2", .offset = 0x26, }, - { .name = "rx_green_prio_3", .offset = 0x27, }, - { .name = "rx_green_prio_4", .offset = 0x28, }, - { .name = "rx_green_prio_5", .offset = 0x29, }, - { .name = "rx_green_prio_6", .offset = 0x2A, }, - { .name = "rx_green_prio_7", .offset = 0x2B, }, - { .name = "tx_octets", .offset = 0x40, }, - { .name = "tx_unicast", .offset = 0x41, }, - { .name = "tx_multicast", .offset = 0x42, }, - { .name = "tx_broadcast", .offset = 0x43, }, - { .name = "tx_collision", .offset = 0x44, }, - { .name = "tx_drops", .offset = 0x45, }, - { .name = "tx_pause", .offset = 0x46, }, - { .name = "tx_frames_below_65_octets", .offset = 0x47, }, - { .name = "tx_frames_65_to_127_octets", .offset = 0x48, }, - { .name = "tx_frames_128_255_octets", .offset = 0x49, }, - { .name = "tx_frames_256_511_octets", .offset = 0x4A, }, - { .name = "tx_frames_512_1023_octets", .offset = 0x4B, }, - { .name = "tx_frames_1024_1526_octets", .offset = 0x4C, }, - { .name = "tx_frames_over_1526_octets", .offset = 0x4D, }, - { .name = "tx_yellow_prio_0", .offset = 0x4E, }, - { .name = "tx_yellow_prio_1", .offset = 0x4F, }, - { .name = "tx_yellow_prio_2", .offset = 0x50, }, - { .name = "tx_yellow_prio_3", .offset = 0x51, }, - { .name = "tx_yellow_prio_4", .offset = 0x52, }, - { .name = "tx_yellow_prio_5", .offset = 0x53, }, - { .name = "tx_yellow_prio_6", .offset = 0x54, }, - { .name = "tx_yellow_prio_7", .offset = 0x55, }, - { .name = "tx_green_prio_0", .offset = 0x56, }, - { .name = "tx_green_prio_1", .offset = 0x57, }, - { .name = "tx_green_prio_2", .offset = 0x58, }, - { .name = "tx_green_prio_3", .offset = 0x59, }, - { .name = "tx_green_prio_4", .offset = 0x5A, }, - { .name = "tx_green_prio_5", .offset = 0x5B, }, - { .name = "tx_green_prio_6", .offset = 0x5C, }, - { .name = "tx_green_prio_7", .offset = 0x5D, }, - { .name = "tx_aged", .offset = 0x5E, }, - { .name = "drop_local", .offset = 0x80, }, - { .name = "drop_tail", .offset = 0x81, }, - { .name = "drop_yellow_prio_0", .offset = 0x82, }, - { .name = "drop_yellow_prio_1", .offset = 0x83, }, - { .name = "drop_yellow_prio_2", .offset = 0x84, }, - { .name = "drop_yellow_prio_3", .offset = 0x85, }, - { .name = "drop_yellow_prio_4", .offset = 0x86, }, - { .name = "drop_yellow_prio_5", .offset = 0x87, }, - { .name = "drop_yellow_prio_6", .offset = 0x88, }, - { .name = "drop_yellow_prio_7", .offset = 0x89, }, - { .name = "drop_green_prio_0", .offset = 0x8A, }, - { .name = "drop_green_prio_1", .offset = 0x8B, }, - { .name = "drop_green_prio_2", .offset = 0x8C, }, - { .name = "drop_green_prio_3", .offset = 0x8D, }, - { .name = "drop_green_prio_4", .offset = 0x8E, }, - { .name = "drop_green_prio_5", .offset = 0x8F, }, - { .name = "drop_green_prio_6", .offset = 0x90, }, - { .name = "drop_green_prio_7", .offset = 0x91, }, - OCELOT_STAT_END +static const struct ocelot_stat_layout ocelot_stats_layout[OCELOT_NUM_STATS] = { + [OCELOT_STAT_RX_OCTETS] = { + .name = "rx_octets", + .reg = SYS_COUNT_RX_OCTETS, + }, + [OCELOT_STAT_RX_UNICAST] = { + .name = "rx_unicast", + .reg = SYS_COUNT_RX_UNICAST, + }, + [OCELOT_STAT_RX_MULTICAST] = { + .name = "rx_multicast", + .reg = SYS_COUNT_RX_MULTICAST, + }, + [OCELOT_STAT_RX_BROADCAST] = { + .name = "rx_broadcast", + .reg = SYS_COUNT_RX_BROADCAST, + }, + [OCELOT_STAT_RX_SHORTS] = { + .name = "rx_shorts", + .reg = SYS_COUNT_RX_SHORTS, + }, + [OCELOT_STAT_RX_FRAGMENTS] = { + .name = "rx_fragments", + .reg = SYS_COUNT_RX_FRAGMENTS, + }, + [OCELOT_STAT_RX_JABBERS] = { + .name = "rx_jabbers", + .reg = SYS_COUNT_RX_JABBERS, + }, + [OCELOT_STAT_RX_CRC_ALIGN_ERRS] = { + .name = "rx_crc_align_errs", + .reg = SYS_COUNT_RX_CRC_ALIGN_ERRS, + }, + [OCELOT_STAT_RX_SYM_ERRS] = { + .name = "rx_sym_errs", + .reg = SYS_COUNT_RX_SYM_ERRS, + }, + [OCELOT_STAT_RX_64] = { + .name = "rx_frames_below_65_octets", + .reg = SYS_COUNT_RX_64, + }, + [OCELOT_STAT_RX_65_127] = { + .name = "rx_frames_65_to_127_octets", + .reg = SYS_COUNT_RX_65_127, + }, + [OCELOT_STAT_RX_128_255] = { + .name = "rx_frames_128_to_255_octets", + .reg = SYS_COUNT_RX_128_255, + }, + [OCELOT_STAT_RX_256_511] = { + .name = "rx_frames_256_to_511_octets", + .reg = SYS_COUNT_RX_256_511, + }, + [OCELOT_STAT_RX_512_1023] = { + .name = "rx_frames_512_to_1023_octets", + .reg = SYS_COUNT_RX_512_1023, + }, + [OCELOT_STAT_RX_1024_1526] = { + .name = "rx_frames_1024_to_1526_octets", + .reg = SYS_COUNT_RX_1024_1526, + }, + [OCELOT_STAT_RX_1527_MAX] = { + .name = "rx_frames_over_1526_octets", + .reg = SYS_COUNT_RX_1527_MAX, + }, + [OCELOT_STAT_RX_PAUSE] = { + .name = "rx_pause", + .reg = SYS_COUNT_RX_PAUSE, + }, + [OCELOT_STAT_RX_CONTROL] = { + .name = "rx_control", + .reg = SYS_COUNT_RX_CONTROL, + }, + [OCELOT_STAT_RX_LONGS] = { + .name = "rx_longs", + .reg = SYS_COUNT_RX_LONGS, + }, + [OCELOT_STAT_RX_CLASSIFIED_DROPS] = { + .name = "rx_classified_drops", + .reg = SYS_COUNT_RX_CLASSIFIED_DROPS, + }, + [OCELOT_STAT_RX_RED_PRIO_0] = { + .name = "rx_red_prio_0", + .reg = SYS_COUNT_RX_RED_PRIO_0, + }, + [OCELOT_STAT_RX_RED_PRIO_1] = { + .name = "rx_red_prio_1", + .reg = SYS_COUNT_RX_RED_PRIO_1, + }, + [OCELOT_STAT_RX_RED_PRIO_2] = { + .name = "rx_red_prio_2", + .reg = SYS_COUNT_RX_RED_PRIO_2, + }, + [OCELOT_STAT_RX_RED_PRIO_3] = { + .name = "rx_red_prio_3", + .reg = SYS_COUNT_RX_RED_PRIO_3, + }, + [OCELOT_STAT_RX_RED_PRIO_4] = { + .name = "rx_red_prio_4", + .reg = SYS_COUNT_RX_RED_PRIO_4, + }, + [OCELOT_STAT_RX_RED_PRIO_5] = { + .name = "rx_red_prio_5", + .reg = SYS_COUNT_RX_RED_PRIO_5, + }, + [OCELOT_STAT_RX_RED_PRIO_6] = { + .name = "rx_red_prio_6", + .reg = SYS_COUNT_RX_RED_PRIO_6, + }, + [OCELOT_STAT_RX_RED_PRIO_7] = { + .name = "rx_red_prio_7", + .reg = SYS_COUNT_RX_RED_PRIO_7, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_0] = { + .name = "rx_yellow_prio_0", + .reg = SYS_COUNT_RX_YELLOW_PRIO_0, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_1] = { + .name = "rx_yellow_prio_1", + .reg = SYS_COUNT_RX_YELLOW_PRIO_1, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_2] = { + .name = "rx_yellow_prio_2", + .reg = SYS_COUNT_RX_YELLOW_PRIO_2, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_3] = { + .name = "rx_yellow_prio_3", + .reg = SYS_COUNT_RX_YELLOW_PRIO_3, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_4] = { + .name = "rx_yellow_prio_4", + .reg = SYS_COUNT_RX_YELLOW_PRIO_4, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_5] = { + .name = "rx_yellow_prio_5", + .reg = SYS_COUNT_RX_YELLOW_PRIO_5, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_6] = { + .name = "rx_yellow_prio_6", + .reg = SYS_COUNT_RX_YELLOW_PRIO_6, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_7] = { + .name = "rx_yellow_prio_7", + .reg = SYS_COUNT_RX_YELLOW_PRIO_7, + }, + [OCELOT_STAT_RX_GREEN_PRIO_0] = { + .name = "rx_green_prio_0", + .reg = SYS_COUNT_RX_GREEN_PRIO_0, + }, + [OCELOT_STAT_RX_GREEN_PRIO_1] = { + .name = "rx_green_prio_1", + .reg = SYS_COUNT_RX_GREEN_PRIO_1, + }, + [OCELOT_STAT_RX_GREEN_PRIO_2] = { + .name = "rx_green_prio_2", + .reg = SYS_COUNT_RX_GREEN_PRIO_2, + }, + [OCELOT_STAT_RX_GREEN_PRIO_3] = { + .name = "rx_green_prio_3", + .reg = SYS_COUNT_RX_GREEN_PRIO_3, + }, + [OCELOT_STAT_RX_GREEN_PRIO_4] = { + .name = "rx_green_prio_4", + .reg = SYS_COUNT_RX_GREEN_PRIO_4, + }, + [OCELOT_STAT_RX_GREEN_PRIO_5] = { + .name = "rx_green_prio_5", + .reg = SYS_COUNT_RX_GREEN_PRIO_5, + }, + [OCELOT_STAT_RX_GREEN_PRIO_6] = { + .name = "rx_green_prio_6", + .reg = SYS_COUNT_RX_GREEN_PRIO_6, + }, + [OCELOT_STAT_RX_GREEN_PRIO_7] = { + .name = "rx_green_prio_7", + .reg = SYS_COUNT_RX_GREEN_PRIO_7, + }, + [OCELOT_STAT_TX_OCTETS] = { + .name = "tx_octets", + .reg = SYS_COUNT_TX_OCTETS, + }, + [OCELOT_STAT_TX_UNICAST] = { + .name = "tx_unicast", + .reg = SYS_COUNT_TX_UNICAST, + }, + [OCELOT_STAT_TX_MULTICAST] = { + .name = "tx_multicast", + .reg = SYS_COUNT_TX_MULTICAST, + }, + [OCELOT_STAT_TX_BROADCAST] = { + .name = "tx_broadcast", + .reg = SYS_COUNT_TX_BROADCAST, + }, + [OCELOT_STAT_TX_COLLISION] = { + .name = "tx_collision", + .reg = SYS_COUNT_TX_COLLISION, + }, + [OCELOT_STAT_TX_DROPS] = { + .name = "tx_drops", + .reg = SYS_COUNT_TX_DROPS, + }, + [OCELOT_STAT_TX_PAUSE] = { + .name = "tx_pause", + .reg = SYS_COUNT_TX_PAUSE, + }, + [OCELOT_STAT_TX_64] = { + .name = "tx_frames_below_65_octets", + .reg = SYS_COUNT_TX_64, + }, + [OCELOT_STAT_TX_65_127] = { + .name = "tx_frames_65_to_127_octets", + .reg = SYS_COUNT_TX_65_127, + }, + [OCELOT_STAT_TX_128_255] = { + .name = "tx_frames_128_255_octets", + .reg = SYS_COUNT_TX_128_255, + }, + [OCELOT_STAT_TX_256_511] = { + .name = "tx_frames_256_511_octets", + .reg = SYS_COUNT_TX_256_511, + }, + [OCELOT_STAT_TX_512_1023] = { + .name = "tx_frames_512_1023_octets", + .reg = SYS_COUNT_TX_512_1023, + }, + [OCELOT_STAT_TX_1024_1526] = { + .name = "tx_frames_1024_1526_octets", + .reg = SYS_COUNT_TX_1024_1526, + }, + [OCELOT_STAT_TX_1527_MAX] = { + .name = "tx_frames_over_1526_octets", + .reg = SYS_COUNT_TX_1527_MAX, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_0] = { + .name = "tx_yellow_prio_0", + .reg = SYS_COUNT_TX_YELLOW_PRIO_0, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_1] = { + .name = "tx_yellow_prio_1", + .reg = SYS_COUNT_TX_YELLOW_PRIO_1, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_2] = { + .name = "tx_yellow_prio_2", + .reg = SYS_COUNT_TX_YELLOW_PRIO_2, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_3] = { + .name = "tx_yellow_prio_3", + .reg = SYS_COUNT_TX_YELLOW_PRIO_3, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_4] = { + .name = "tx_yellow_prio_4", + .reg = SYS_COUNT_TX_YELLOW_PRIO_4, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_5] = { + .name = "tx_yellow_prio_5", + .reg = SYS_COUNT_TX_YELLOW_PRIO_5, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_6] = { + .name = "tx_yellow_prio_6", + .reg = SYS_COUNT_TX_YELLOW_PRIO_6, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_7] = { + .name = "tx_yellow_prio_7", + .reg = SYS_COUNT_TX_YELLOW_PRIO_7, + }, + [OCELOT_STAT_TX_GREEN_PRIO_0] = { + .name = "tx_green_prio_0", + .reg = SYS_COUNT_TX_GREEN_PRIO_0, + }, + [OCELOT_STAT_TX_GREEN_PRIO_1] = { + .name = "tx_green_prio_1", + .reg = SYS_COUNT_TX_GREEN_PRIO_1, + }, + [OCELOT_STAT_TX_GREEN_PRIO_2] = { + .name = "tx_green_prio_2", + .reg = SYS_COUNT_TX_GREEN_PRIO_2, + }, + [OCELOT_STAT_TX_GREEN_PRIO_3] = { + .name = "tx_green_prio_3", + .reg = SYS_COUNT_TX_GREEN_PRIO_3, + }, + [OCELOT_STAT_TX_GREEN_PRIO_4] = { + .name = "tx_green_prio_4", + .reg = SYS_COUNT_TX_GREEN_PRIO_4, + }, + [OCELOT_STAT_TX_GREEN_PRIO_5] = { + .name = "tx_green_prio_5", + .reg = SYS_COUNT_TX_GREEN_PRIO_5, + }, + [OCELOT_STAT_TX_GREEN_PRIO_6] = { + .name = "tx_green_prio_6", + .reg = SYS_COUNT_TX_GREEN_PRIO_6, + }, + [OCELOT_STAT_TX_GREEN_PRIO_7] = { + .name = "tx_green_prio_7", + .reg = SYS_COUNT_TX_GREEN_PRIO_7, + }, + [OCELOT_STAT_TX_AGED] = { + .name = "tx_aged", + .reg = SYS_COUNT_TX_AGING, + }, + [OCELOT_STAT_DROP_LOCAL] = { + .name = "drop_local", + .reg = SYS_COUNT_DROP_LOCAL, + }, + [OCELOT_STAT_DROP_TAIL] = { + .name = "drop_tail", + .reg = SYS_COUNT_DROP_TAIL, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_0] = { + .name = "drop_yellow_prio_0", + .reg = SYS_COUNT_DROP_YELLOW_PRIO_0, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_1] = { + .name = "drop_yellow_prio_1", + .reg = SYS_COUNT_DROP_YELLOW_PRIO_1, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_2] = { + .name = "drop_yellow_prio_2", + .reg = SYS_COUNT_DROP_YELLOW_PRIO_2, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_3] = { + .name = "drop_yellow_prio_3", + .reg = SYS_COUNT_DROP_YELLOW_PRIO_3, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_4] = { + .name = "drop_yellow_prio_4", + .reg = SYS_COUNT_DROP_YELLOW_PRIO_4, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_5] = { + .name = "drop_yellow_prio_5", + .reg = SYS_COUNT_DROP_YELLOW_PRIO_5, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_6] = { + .name = "drop_yellow_prio_6", + .reg = SYS_COUNT_DROP_YELLOW_PRIO_6, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_7] = { + .name = "drop_yellow_prio_7", + .reg = SYS_COUNT_DROP_YELLOW_PRIO_7, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_0] = { + .name = "drop_green_prio_0", + .reg = SYS_COUNT_DROP_GREEN_PRIO_0, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_1] = { + .name = "drop_green_prio_1", + .reg = SYS_COUNT_DROP_GREEN_PRIO_1, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_2] = { + .name = "drop_green_prio_2", + .reg = SYS_COUNT_DROP_GREEN_PRIO_2, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_3] = { + .name = "drop_green_prio_3", + .reg = SYS_COUNT_DROP_GREEN_PRIO_3, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_4] = { + .name = "drop_green_prio_4", + .reg = SYS_COUNT_DROP_GREEN_PRIO_4, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_5] = { + .name = "drop_green_prio_5", + .reg = SYS_COUNT_DROP_GREEN_PRIO_5, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_6] = { + .name = "drop_green_prio_6", + .reg = SYS_COUNT_DROP_GREEN_PRIO_6, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_7] = { + .name = "drop_green_prio_7", + .reg = SYS_COUNT_DROP_GREEN_PRIO_7, + }, }; static void ocelot_pll5_init(struct ocelot *ocelot) diff --git a/drivers/net/ethernet/mscc/vsc7514_regs.c b/drivers/net/ethernet/mscc/vsc7514_regs.c index c2af4eb8ca5d..9cf82ecf191c 100644 --- a/drivers/net/ethernet/mscc/vsc7514_regs.c +++ b/drivers/net/ethernet/mscc/vsc7514_regs.c @@ -180,13 +180,38 @@ const u32 vsc7514_sys_regmap[] = { REG(SYS_COUNT_RX_64, 0x000024), REG(SYS_COUNT_RX_65_127, 0x000028), REG(SYS_COUNT_RX_128_255, 0x00002c), - REG(SYS_COUNT_RX_256_1023, 0x000030), - REG(SYS_COUNT_RX_1024_1526, 0x000034), - REG(SYS_COUNT_RX_1527_MAX, 0x000038), - REG(SYS_COUNT_RX_PAUSE, 0x00003c), - REG(SYS_COUNT_RX_CONTROL, 0x000040), - REG(SYS_COUNT_RX_LONGS, 0x000044), - REG(SYS_COUNT_RX_CLASSIFIED_DROPS, 0x000048), + REG(SYS_COUNT_RX_256_511, 0x000030), + REG(SYS_COUNT_RX_512_1023, 0x000034), + REG(SYS_COUNT_RX_1024_1526, 0x000038), + REG(SYS_COUNT_RX_1527_MAX, 0x00003c), + REG(SYS_COUNT_RX_PAUSE, 0x000040), + REG(SYS_COUNT_RX_CONTROL, 0x000044), + REG(SYS_COUNT_RX_LONGS, 0x000048), + REG(SYS_COUNT_RX_CLASSIFIED_DROPS, 0x00004c), + REG(SYS_COUNT_RX_RED_PRIO_0, 0x000050), + REG(SYS_COUNT_RX_RED_PRIO_1, 0x000054), + REG(SYS_COUNT_RX_RED_PRIO_2, 0x000058), + REG(SYS_COUNT_RX_RED_PRIO_3, 0x00005c), + REG(SYS_COUNT_RX_RED_PRIO_4, 0x000060), + REG(SYS_COUNT_RX_RED_PRIO_5, 0x000064), + REG(SYS_COUNT_RX_RED_PRIO_6, 0x000068), + REG(SYS_COUNT_RX_RED_PRIO_7, 0x00006c), + REG(SYS_COUNT_RX_YELLOW_PRIO_0, 0x000070), + REG(SYS_COUNT_RX_YELLOW_PRIO_1, 0x000074), + REG(SYS_COUNT_RX_YELLOW_PRIO_2, 0x000078), + REG(SYS_COUNT_RX_YELLOW_PRIO_3, 0x00007c), + REG(SYS_COUNT_RX_YELLOW_PRIO_4, 0x000080), + REG(SYS_COUNT_RX_YELLOW_PRIO_5, 0x000084), + REG(SYS_COUNT_RX_YELLOW_PRIO_6, 0x000088), + REG(SYS_COUNT_RX_YELLOW_PRIO_7, 0x00008c), + REG(SYS_COUNT_RX_GREEN_PRIO_0, 0x000090), + REG(SYS_COUNT_RX_GREEN_PRIO_1, 0x000094), + REG(SYS_COUNT_RX_GREEN_PRIO_2, 0x000098), + REG(SYS_COUNT_RX_GREEN_PRIO_3, 0x00009c), + REG(SYS_COUNT_RX_GREEN_PRIO_4, 0x0000a0), + REG(SYS_COUNT_RX_GREEN_PRIO_5, 0x0000a4), + REG(SYS_COUNT_RX_GREEN_PRIO_6, 0x0000a8), + REG(SYS_COUNT_RX_GREEN_PRIO_7, 0x0000ac), REG(SYS_COUNT_TX_OCTETS, 0x000100), REG(SYS_COUNT_TX_UNICAST, 0x000104), REG(SYS_COUNT_TX_MULTICAST, 0x000108), @@ -196,11 +221,46 @@ const u32 vsc7514_sys_regmap[] = { REG(SYS_COUNT_TX_PAUSE, 0x000118), REG(SYS_COUNT_TX_64, 0x00011c), REG(SYS_COUNT_TX_65_127, 0x000120), - REG(SYS_COUNT_TX_128_511, 0x000124), - REG(SYS_COUNT_TX_512_1023, 0x000128), - REG(SYS_COUNT_TX_1024_1526, 0x00012c), - REG(SYS_COUNT_TX_1527_MAX, 0x000130), - REG(SYS_COUNT_TX_AGING, 0x000170), + REG(SYS_COUNT_TX_128_255, 0x000124), + REG(SYS_COUNT_TX_256_511, 0x000128), + REG(SYS_COUNT_TX_512_1023, 0x00012c), + REG(SYS_COUNT_TX_1024_1526, 0x000130), + REG(SYS_COUNT_TX_1527_MAX, 0x000134), + REG(SYS_COUNT_TX_YELLOW_PRIO_0, 0x000138), + REG(SYS_COUNT_TX_YELLOW_PRIO_1, 0x00013c), + REG(SYS_COUNT_TX_YELLOW_PRIO_2, 0x000140), + REG(SYS_COUNT_TX_YELLOW_PRIO_3, 0x000144), + REG(SYS_COUNT_TX_YELLOW_PRIO_4, 0x000148), + REG(SYS_COUNT_TX_YELLOW_PRIO_5, 0x00014c), + REG(SYS_COUNT_TX_YELLOW_PRIO_6, 0x000150), + REG(SYS_COUNT_TX_YELLOW_PRIO_7, 0x000154), + REG(SYS_COUNT_TX_GREEN_PRIO_0, 0x000158), + REG(SYS_COUNT_TX_GREEN_PRIO_1, 0x00015c), + REG(SYS_COUNT_TX_GREEN_PRIO_2, 0x000160), + REG(SYS_COUNT_TX_GREEN_PRIO_3, 0x000164), + REG(SYS_COUNT_TX_GREEN_PRIO_4, 0x000168), + REG(SYS_COUNT_TX_GREEN_PRIO_5, 0x00016c), + REG(SYS_COUNT_TX_GREEN_PRIO_6, 0x000170), + REG(SYS_COUNT_TX_GREEN_PRIO_7, 0x000174), + REG(SYS_COUNT_TX_AGING, 0x000178), + REG(SYS_COUNT_DROP_LOCAL, 0x000200), + REG(SYS_COUNT_DROP_TAIL, 0x000204), + REG(SYS_COUNT_DROP_YELLOW_PRIO_0, 0x000208), + REG(SYS_COUNT_DROP_YELLOW_PRIO_1, 0x00020c), + REG(SYS_COUNT_DROP_YELLOW_PRIO_2, 0x000210), + REG(SYS_COUNT_DROP_YELLOW_PRIO_3, 0x000214), + REG(SYS_COUNT_DROP_YELLOW_PRIO_4, 0x000218), + REG(SYS_COUNT_DROP_YELLOW_PRIO_5, 0x00021c), + REG(SYS_COUNT_DROP_YELLOW_PRIO_6, 0x000220), + REG(SYS_COUNT_DROP_YELLOW_PRIO_7, 0x000214), + REG(SYS_COUNT_DROP_GREEN_PRIO_0, 0x000218), + REG(SYS_COUNT_DROP_GREEN_PRIO_1, 0x00021c), + REG(SYS_COUNT_DROP_GREEN_PRIO_2, 0x000220), + REG(SYS_COUNT_DROP_GREEN_PRIO_3, 0x000224), + REG(SYS_COUNT_DROP_GREEN_PRIO_4, 0x000228), + REG(SYS_COUNT_DROP_GREEN_PRIO_5, 0x00022c), + REG(SYS_COUNT_DROP_GREEN_PRIO_6, 0x000230), + REG(SYS_COUNT_DROP_GREEN_PRIO_7, 0x000234), REG(SYS_RESET_CFG, 0x000508), REG(SYS_CMID, 0x00050c), REG(SYS_VLAN_ETYPE_CFG, 0x000510), diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 52f9ed8db9c9..4f2b82a884b9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -1134,6 +1134,7 @@ static void intel_eth_pci_remove(struct pci_dev *pdev) stmmac_dvr_remove(&pdev->dev); + clk_disable_unprepare(priv->plat->stmmac_clk); clk_unregister_fixed_rate(priv->plat->stmmac_clk); pcim_iounmap_regions(pdev, BIT(0)); diff --git a/drivers/net/fddi/skfp/h/hwmtm.h b/drivers/net/fddi/skfp/h/hwmtm.h index 76c4a709d73d..e97db826cdd4 100644 --- a/drivers/net/fddi/skfp/h/hwmtm.h +++ b/drivers/net/fddi/skfp/h/hwmtm.h @@ -348,7 +348,7 @@ do { \ * This macro is invoked by the OS-specific before it left the * function mac_drv_rx_complete. This macro calls mac_drv_fill_rxd * if the number of used RxDs is equal or lower than the - * the given low water mark. + * given low water mark. * * para low_water low water mark of used RxD's * diff --git a/drivers/net/ipa/ipa_reg.h b/drivers/net/ipa/ipa_reg.h index a5b355384d4a..6f35438cda89 100644 --- a/drivers/net/ipa/ipa_reg.h +++ b/drivers/net/ipa/ipa_reg.h @@ -48,7 +48,7 @@ struct ipa; * * The offset of registers related to resource types is computed by a macro * that is supplied a parameter "rt". The "rt" represents a resource type, - * which is is a member of the ipa_resource_type_src enumerated type for + * which is a member of the ipa_resource_type_src enumerated type for * source endpoint resources or the ipa_resource_type_dst enumerated type * for destination endpoint resources. * diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index d934774e9733..9cce7dec7366 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1211,7 +1211,7 @@ static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash, if (!hdr_hash || !skb) return; - switch ((int)hdr_hash->hash_report) { + switch (__le16_to_cpu(hdr_hash->hash_report)) { case VIRTIO_NET_HASH_REPORT_TCPv4: case VIRTIO_NET_HASH_REPORT_UDPv4: case VIRTIO_NET_HASH_REPORT_TCPv6: @@ -1229,7 +1229,7 @@ static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash, default: rss_hash_type = PKT_HASH_TYPE_NONE; } - skb_set_hash(skb, (unsigned int)hdr_hash->hash_value, rss_hash_type); + skb_set_hash(skb, __le32_to_cpu(hdr_hash->hash_value), rss_hash_type); } static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, @@ -3432,29 +3432,6 @@ static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqu (unsigned int)GOOD_PACKET_LEN); } -static void virtnet_config_sizes(struct virtnet_info *vi, u32 *sizes) -{ - u32 i, rx_size, tx_size; - - if (vi->speed == SPEED_UNKNOWN || vi->speed < SPEED_10000) { - rx_size = 1024; - tx_size = 1024; - - } else if (vi->speed < SPEED_40000) { - rx_size = 1024 * 4; - tx_size = 1024 * 4; - - } else { - rx_size = 1024 * 8; - tx_size = 1024 * 8; - } - - for (i = 0; i < vi->max_queue_pairs; i++) { - sizes[rxq2vq(i)] = rx_size; - sizes[txq2vq(i)] = tx_size; - } -} - static int virtnet_find_vqs(struct virtnet_info *vi) { vq_callback_t **callbacks; @@ -3462,7 +3439,6 @@ static int virtnet_find_vqs(struct virtnet_info *vi) int ret = -ENOMEM; int i, total_vqs; const char **names; - u32 *sizes; bool *ctx; /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by @@ -3490,15 +3466,10 @@ static int virtnet_find_vqs(struct virtnet_info *vi) ctx = NULL; } - sizes = kmalloc_array(total_vqs, sizeof(*sizes), GFP_KERNEL); - if (!sizes) - goto err_sizes; - /* Parameters for control virtqueue, if any */ if (vi->has_cvq) { callbacks[total_vqs - 1] = NULL; names[total_vqs - 1] = "control"; - sizes[total_vqs - 1] = 64; } /* Allocate/initialize parameters for send/receive virtqueues */ @@ -3513,10 +3484,8 @@ static int virtnet_find_vqs(struct virtnet_info *vi) ctx[rxq2vq(i)] = true; } - virtnet_config_sizes(vi, sizes); - - ret = virtio_find_vqs_ctx_size(vi->vdev, total_vqs, vqs, callbacks, - names, sizes, ctx, NULL); + ret = virtio_find_vqs_ctx(vi->vdev, total_vqs, vqs, callbacks, + names, ctx, NULL); if (ret) goto err_find; @@ -3536,8 +3505,6 @@ static int virtnet_find_vqs(struct virtnet_info *vi) err_find: - kfree(sizes); -err_sizes: kfree(ctx); err_ctx: kfree(names); @@ -3897,9 +3864,6 @@ static int virtnet_probe(struct virtio_device *vdev) vi->curr_queue_pairs = num_online_cpus(); vi->max_queue_pairs = max_queue_pairs; - virtnet_init_settings(dev); - virtnet_update_settings(vi); - /* Allocate/initialize the rx/tx queues, and invoke find_vqs */ err = init_vqs(vi); if (err) @@ -3912,6 +3876,8 @@ static int virtnet_probe(struct virtio_device *vdev) netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs); netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs); + virtnet_init_settings(dev); + if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { vi->failover = net_failover_create(vi->dev); if (IS_ERR(vi->failover)) { diff --git a/drivers/perf/riscv_pmu_legacy.c b/drivers/perf/riscv_pmu_legacy.c index 342778782359..2c20b0de8cb0 100644 --- a/drivers/perf/riscv_pmu_legacy.c +++ b/drivers/perf/riscv_pmu_legacy.c @@ -72,7 +72,7 @@ static void pmu_legacy_ctr_start(struct perf_event *event, u64 ival) local64_set(&hwc->prev_count, initial_val); } -/** +/* * This is just a simple implementation to allow legacy implementations * compatible with new RISC-V PMU driver framework. * This driver only allows reading two counters i.e CYCLE & INSTRET. diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c index 8be13d416f48..1ae3c56b66b0 100644 --- a/drivers/platform/mellanox/mlxbf-tmfifo.c +++ b/drivers/platform/mellanox/mlxbf-tmfifo.c @@ -928,7 +928,6 @@ static int mlxbf_tmfifo_virtio_find_vqs(struct virtio_device *vdev, struct virtqueue *vqs[], vq_callback_t *callbacks[], const char * const names[], - u32 sizes[], const bool *ctx, struct irq_affinity *desc) { diff --git a/drivers/platform/x86/serial-multi-instantiate.c b/drivers/platform/x86/serial-multi-instantiate.c index 67feed25c9db..5362f1a7b77c 100644 --- a/drivers/platform/x86/serial-multi-instantiate.c +++ b/drivers/platform/x86/serial-multi-instantiate.c @@ -328,6 +328,7 @@ static const struct acpi_device_id smi_acpi_ids[] = { { "INT3515", (unsigned long)&int3515_data }, /* Non-conforming _HID for Cirrus Logic already released */ { "CLSA0100", (unsigned long)&cs35l41_hda }, + { "CLSA0101", (unsigned long)&cs35l41_hda }, { } }; MODULE_DEVICE_TABLE(acpi, smi_acpi_ids); diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 7150b1d0159e..d8373cb04f90 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -4784,10 +4784,10 @@ int regulator_bulk_get(struct device *dev, int num_consumers, consumers[i].consumer = regulator_get(dev, consumers[i].supply); if (IS_ERR(consumers[i].consumer)) { - consumers[i].consumer = NULL; ret = dev_err_probe(dev, PTR_ERR(consumers[i].consumer), "Failed to get supply '%s'", consumers[i].supply); + consumers[i].consumer = NULL; goto err; } diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c index 81c4f5776109..0f7706e23eb9 100644 --- a/drivers/remoteproc/remoteproc_virtio.c +++ b/drivers/remoteproc/remoteproc_virtio.c @@ -158,7 +158,6 @@ static int rproc_virtio_find_vqs(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], const char * const names[], - u32 sizes[], const bool * ctx, struct irq_affinity *desc) { diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 8f1d1cf23d44..59ac98f2bd27 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -2086,6 +2086,9 @@ static inline void ap_scan_adapter(int ap) */ static bool ap_get_configuration(void) { + if (!ap_qci_info) /* QCI not supported */ + return false; + memcpy(ap_qci_info_old, ap_qci_info, sizeof(*ap_qci_info)); ap_fetch_qci_info(ap_qci_info); diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 0c40af157df2..0f17933954fb 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -148,12 +148,16 @@ struct ap_driver { /* * Called at the start of the ap bus scan function when * the crypto config information (qci) has changed. + * This callback is not invoked if there is no AP + * QCI support available. */ void (*on_config_changed)(struct ap_config_info *new_config_info, struct ap_config_info *old_config_info); /* * Called at the end of the ap bus scan function when * the crypto config information (qci) has changed. + * This callback is not invoked if there is no AP + * QCI support available. */ void (*on_scan_complete)(struct ap_config_info *new_config_info, struct ap_config_info *old_config_info); diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index 896896e32664..a10dbe632ef9 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -637,7 +637,6 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], const char * const names[], - u32 sizes[], const bool *ctx, struct irq_affinity *desc) { diff --git a/drivers/spi/spi-meson-spicc.c b/drivers/spi/spi-meson-spicc.c index 0bc7daa7afc8..e4cb52e1fe26 100644 --- a/drivers/spi/spi-meson-spicc.c +++ b/drivers/spi/spi-meson-spicc.c @@ -156,6 +156,7 @@ struct meson_spicc_device { void __iomem *base; struct clk *core; struct clk *pclk; + struct clk_divider pow2_div; struct clk *clk; struct spi_message *message; struct spi_transfer *xfer; @@ -168,6 +169,8 @@ struct meson_spicc_device { unsigned long xfer_remain; }; +#define pow2_clk_to_spicc(_div) container_of(_div, struct meson_spicc_device, pow2_div) + static void meson_spicc_oen_enable(struct meson_spicc_device *spicc) { u32 conf; @@ -421,7 +424,7 @@ static int meson_spicc_prepare_message(struct spi_master *master, { struct meson_spicc_device *spicc = spi_master_get_devdata(master); struct spi_device *spi = message->spi; - u32 conf = 0; + u32 conf = readl_relaxed(spicc->base + SPICC_CONREG) & SPICC_DATARATE_MASK; /* Store current message */ spicc->message = message; @@ -458,8 +461,6 @@ static int meson_spicc_prepare_message(struct spi_master *master, /* Select CS */ conf |= FIELD_PREP(SPICC_CS_MASK, spi->chip_select); - /* Default Clock rate core/4 */ - /* Default 8bit word */ conf |= FIELD_PREP(SPICC_BITLENGTH_MASK, 8 - 1); @@ -476,12 +477,16 @@ static int meson_spicc_prepare_message(struct spi_master *master, static int meson_spicc_unprepare_transfer(struct spi_master *master) { struct meson_spicc_device *spicc = spi_master_get_devdata(master); + u32 conf = readl_relaxed(spicc->base + SPICC_CONREG) & SPICC_DATARATE_MASK; /* Disable all IRQs */ writel(0, spicc->base + SPICC_INTREG); device_reset_optional(&spicc->pdev->dev); + /* Set default configuration, keeping datarate field */ + writel_relaxed(conf, spicc->base + SPICC_CONREG); + return 0; } @@ -518,14 +523,60 @@ static void meson_spicc_cleanup(struct spi_device *spi) * Clk path for G12A series: * pclk -> pow2 fixed div -> pow2 div -> mux -> out * pclk -> enh fixed div -> enh div -> mux -> out + * + * The pow2 divider is tied to the controller HW state, and the + * divider is only valid when the controller is initialized. + * + * A set of clock ops is added to make sure we don't read/set this + * clock rate while the controller is in an unknown state. */ -static int meson_spicc_clk_init(struct meson_spicc_device *spicc) +static unsigned long meson_spicc_pow2_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct clk_divider *divider = to_clk_divider(hw); + struct meson_spicc_device *spicc = pow2_clk_to_spicc(divider); + + if (!spicc->master->cur_msg || !spicc->master->busy) + return 0; + + return clk_divider_ops.recalc_rate(hw, parent_rate); +} + +static int meson_spicc_pow2_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) +{ + struct clk_divider *divider = to_clk_divider(hw); + struct meson_spicc_device *spicc = pow2_clk_to_spicc(divider); + + if (!spicc->master->cur_msg || !spicc->master->busy) + return -EINVAL; + + return clk_divider_ops.determine_rate(hw, req); +} + +static int meson_spicc_pow2_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct clk_divider *divider = to_clk_divider(hw); + struct meson_spicc_device *spicc = pow2_clk_to_spicc(divider); + + if (!spicc->master->cur_msg || !spicc->master->busy) + return -EINVAL; + + return clk_divider_ops.set_rate(hw, rate, parent_rate); +} + +const struct clk_ops meson_spicc_pow2_clk_ops = { + .recalc_rate = meson_spicc_pow2_recalc_rate, + .determine_rate = meson_spicc_pow2_determine_rate, + .set_rate = meson_spicc_pow2_set_rate, +}; + +static int meson_spicc_pow2_clk_init(struct meson_spicc_device *spicc) { struct device *dev = &spicc->pdev->dev; - struct clk_fixed_factor *pow2_fixed_div, *enh_fixed_div; - struct clk_divider *pow2_div, *enh_div; - struct clk_mux *mux; + struct clk_fixed_factor *pow2_fixed_div; struct clk_init_data init; struct clk *clk; struct clk_parent_data parent_data[2]; @@ -560,31 +611,45 @@ static int meson_spicc_clk_init(struct meson_spicc_device *spicc) if (WARN_ON(IS_ERR(clk))) return PTR_ERR(clk); - pow2_div = devm_kzalloc(dev, sizeof(*pow2_div), GFP_KERNEL); - if (!pow2_div) - return -ENOMEM; - snprintf(name, sizeof(name), "%s#pow2_div", dev_name(dev)); init.name = name; - init.ops = &clk_divider_ops; - init.flags = CLK_SET_RATE_PARENT; + init.ops = &meson_spicc_pow2_clk_ops; + /* + * Set NOCACHE here to make sure we read the actual HW value + * since we reset the HW after each transfer. + */ + init.flags = CLK_SET_RATE_PARENT | CLK_GET_RATE_NOCACHE; parent_data[0].hw = &pow2_fixed_div->hw; init.num_parents = 1; - pow2_div->shift = 16, - pow2_div->width = 3, - pow2_div->flags = CLK_DIVIDER_POWER_OF_TWO, - pow2_div->reg = spicc->base + SPICC_CONREG; - pow2_div->hw.init = &init; + spicc->pow2_div.shift = 16, + spicc->pow2_div.width = 3, + spicc->pow2_div.flags = CLK_DIVIDER_POWER_OF_TWO, + spicc->pow2_div.reg = spicc->base + SPICC_CONREG; + spicc->pow2_div.hw.init = &init; - clk = devm_clk_register(dev, &pow2_div->hw); - if (WARN_ON(IS_ERR(clk))) - return PTR_ERR(clk); + spicc->clk = devm_clk_register(dev, &spicc->pow2_div.hw); + if (WARN_ON(IS_ERR(spicc->clk))) + return PTR_ERR(spicc->clk); - if (!spicc->data->has_enhance_clk_div) { - spicc->clk = clk; - return 0; - } + return 0; +} + +static int meson_spicc_enh_clk_init(struct meson_spicc_device *spicc) +{ + struct device *dev = &spicc->pdev->dev; + struct clk_fixed_factor *enh_fixed_div; + struct clk_divider *enh_div; + struct clk_mux *mux; + struct clk_init_data init; + struct clk *clk; + struct clk_parent_data parent_data[2]; + char name[64]; + + memset(&init, 0, sizeof(init)); + memset(&parent_data, 0, sizeof(parent_data)); + + init.parent_data = parent_data; /* algorithm for enh div: rate = freq / 2 / (N + 1) */ @@ -637,7 +702,7 @@ static int meson_spicc_clk_init(struct meson_spicc_device *spicc) snprintf(name, sizeof(name), "%s#sel", dev_name(dev)); init.name = name; init.ops = &clk_mux_ops; - parent_data[0].hw = &pow2_div->hw; + parent_data[0].hw = &spicc->pow2_div.hw; parent_data[1].hw = &enh_div->hw; init.num_parents = 2; init.flags = CLK_SET_RATE_PARENT; @@ -754,12 +819,20 @@ static int meson_spicc_probe(struct platform_device *pdev) meson_spicc_oen_enable(spicc); - ret = meson_spicc_clk_init(spicc); + ret = meson_spicc_pow2_clk_init(spicc); if (ret) { - dev_err(&pdev->dev, "clock registration failed\n"); + dev_err(&pdev->dev, "pow2 clock registration failed\n"); goto out_clk; } + if (spicc->data->has_enhance_clk_div) { + ret = meson_spicc_enh_clk_init(spicc); + if (ret) { + dev_err(&pdev->dev, "clock registration failed\n"); + goto out_clk; + } + } + ret = devm_spi_register_master(&pdev->dev, master); if (ret) { dev_err(&pdev->dev, "spi master registration failed\n"); diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 8f97a3eacdea..83da8862b8f2 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -95,7 +95,7 @@ static ssize_t driver_override_show(struct device *dev, } static DEVICE_ATTR_RW(driver_override); -static struct spi_statistics *spi_alloc_pcpu_stats(struct device *dev) +static struct spi_statistics __percpu *spi_alloc_pcpu_stats(struct device *dev) { struct spi_statistics __percpu *pcpu_stats; @@ -162,7 +162,7 @@ static struct device_attribute dev_attr_spi_device_##field = { \ } #define SPI_STATISTICS_SHOW_NAME(name, file, field) \ -static ssize_t spi_statistics_##name##_show(struct spi_statistics *stat, \ +static ssize_t spi_statistics_##name##_show(struct spi_statistics __percpu *stat, \ char *buf) \ { \ ssize_t len; \ @@ -309,7 +309,7 @@ static const struct attribute_group *spi_master_groups[] = { NULL, }; -static void spi_statistics_add_transfer_stats(struct spi_statistics *pcpu_stats, +static void spi_statistics_add_transfer_stats(struct spi_statistics __percpu *pcpu_stats, struct spi_transfer *xfer, struct spi_controller *ctlr) { @@ -1275,8 +1275,8 @@ static int spi_transfer_wait(struct spi_controller *ctlr, struct spi_message *msg, struct spi_transfer *xfer) { - struct spi_statistics *statm = ctlr->pcpu_statistics; - struct spi_statistics *stats = msg->spi->pcpu_statistics; + struct spi_statistics __percpu *statm = ctlr->pcpu_statistics; + struct spi_statistics __percpu *stats = msg->spi->pcpu_statistics; u32 speed_hz = xfer->speed_hz; unsigned long long ms; @@ -1432,8 +1432,8 @@ static int spi_transfer_one_message(struct spi_controller *ctlr, struct spi_transfer *xfer; bool keep_cs = false; int ret = 0; - struct spi_statistics *statm = ctlr->pcpu_statistics; - struct spi_statistics *stats = msg->spi->pcpu_statistics; + struct spi_statistics __percpu *statm = ctlr->pcpu_statistics; + struct spi_statistics __percpu *stats = msg->spi->pcpu_statistics; spi_set_cs(msg->spi, true, false); diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c index f2b1bcefcadd..1175f3a46859 100644 --- a/drivers/tee/tee_shm.c +++ b/drivers/tee/tee_shm.c @@ -326,6 +326,9 @@ struct tee_shm *tee_shm_register_user_buf(struct tee_context *ctx, void *ret; int id; + if (!access_ok((void __user *)addr, length)) + return ERR_PTR(-EFAULT); + mutex_lock(&teedev->mutex); id = idr_alloc(&teedev->idr, NULL, 1, 0, GFP_KERNEL); mutex_unlock(&teedev->mutex); diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index c492a57531c6..3ff746e3f24a 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -360,7 +360,7 @@ static void vm_synchronize_cbs(struct virtio_device *vdev) static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned int index, void (*callback)(struct virtqueue *vq), - const char *name, u32 size, bool ctx) + const char *name, bool ctx) { struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); struct virtio_mmio_vq_info *info; @@ -395,11 +395,8 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned int in goto error_new_virtqueue; } - if (!size || size > num) - size = num; - /* Create the vring */ - vq = vring_create_virtqueue(index, size, VIRTIO_MMIO_VRING_ALIGN, vdev, + vq = vring_create_virtqueue(index, num, VIRTIO_MMIO_VRING_ALIGN, vdev, true, true, ctx, vm_notify, callback, name); if (!vq) { err = -ENOMEM; @@ -477,7 +474,6 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], const char * const names[], - u32 sizes[], const bool *ctx, struct irq_affinity *desc) { @@ -503,7 +499,6 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned int nvqs, } vqs[i] = vm_setup_vq(vdev, queue_idx++, callbacks[i], names[i], - sizes ? sizes[i] : 0, ctx ? ctx[i] : false); if (IS_ERR(vqs[i])) { vm_del_vqs(vdev); diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 00ad476a815d..ad258a9d3b9f 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -174,7 +174,6 @@ error: static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned int index, void (*callback)(struct virtqueue *vq), const char *name, - u32 size, bool ctx, u16 msix_vec) { @@ -187,7 +186,7 @@ static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned int in if (!info) return ERR_PTR(-ENOMEM); - vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, size, ctx, + vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, ctx, msix_vec); if (IS_ERR(vq)) goto out_info; @@ -284,7 +283,7 @@ void vp_del_vqs(struct virtio_device *vdev) static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], u32 sizes[], bool per_vq_vectors, + const char * const names[], bool per_vq_vectors, const bool *ctx, struct irq_affinity *desc) { @@ -327,8 +326,8 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs, else msix_vec = VP_MSIX_VQ_VECTOR; vqs[i] = vp_setup_vq(vdev, queue_idx++, callbacks[i], names[i], - sizes ? sizes[i] : 0, - ctx ? ctx[i] : false, msix_vec); + ctx ? ctx[i] : false, + msix_vec); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); goto error_find; @@ -358,7 +357,7 @@ error_find: static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], u32 sizes[], const bool *ctx) + const char * const names[], const bool *ctx) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); int i, err, queue_idx = 0; @@ -380,7 +379,6 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs, continue; } vqs[i] = vp_setup_vq(vdev, queue_idx++, callbacks[i], names[i], - sizes ? sizes[i] : 0, ctx ? ctx[i] : false, VIRTIO_MSI_NO_VECTOR); if (IS_ERR(vqs[i])) { @@ -398,21 +396,21 @@ out_del_vqs: /* the config->find_vqs() implementation */ int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], u32 sizes[], const bool *ctx, + const char * const names[], const bool *ctx, struct irq_affinity *desc) { int err; /* Try MSI-X with one vector per queue. */ - err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, sizes, true, ctx, desc); + err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true, ctx, desc); if (!err) return 0; /* Fallback: MSI-X with one vector for config, one shared for queues. */ - err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, sizes, false, ctx, desc); + err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false, ctx, desc); if (!err) return 0; /* Finally fall back to regular interrupts. */ - return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names, sizes, ctx); + return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names, ctx); } const char *vp_bus_name(struct virtio_device *vdev) diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index c0448378b698..23112d84218f 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -80,7 +80,6 @@ struct virtio_pci_device { unsigned int idx, void (*callback)(struct virtqueue *vq), const char *name, - u32 size, bool ctx, u16 msix_vec); void (*del_vq)(struct virtio_pci_vq_info *info); @@ -111,7 +110,7 @@ void vp_del_vqs(struct virtio_device *vdev); /* the config->find_vqs() implementation */ int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], u32 sizes[], const bool *ctx, + const char * const names[], const bool *ctx, struct irq_affinity *desc); const char *vp_bus_name(struct virtio_device *vdev); diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c index d75e5c4e637f..2257f1b3d8ae 100644 --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -112,7 +112,6 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, unsigned int index, void (*callback)(struct virtqueue *vq), const char *name, - u32 size, bool ctx, u16 msix_vec) { @@ -126,13 +125,10 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, if (!num || vp_legacy_get_queue_enable(&vp_dev->ldev, index)) return ERR_PTR(-ENOENT); - if (!size || size > num) - size = num; - info->msix_vector = msix_vec; /* create the vring */ - vq = vring_create_virtqueue(index, size, + vq = vring_create_virtqueue(index, num, VIRTIO_PCI_VRING_ALIGN, &vp_dev->vdev, true, false, ctx, vp_notify, callback, name); diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index f7965c5dd36b..c3b9f2761849 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -293,7 +293,6 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, unsigned int index, void (*callback)(struct virtqueue *vq), const char *name, - u32 size, bool ctx, u16 msix_vec) { @@ -311,18 +310,15 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, if (!num || vp_modern_get_queue_enable(mdev, index)) return ERR_PTR(-ENOENT); - if (!size || size > num) - size = num; - - if (size & (size - 1)) { - dev_warn(&vp_dev->pci_dev->dev, "bad queue size %u", size); + if (num & (num - 1)) { + dev_warn(&vp_dev->pci_dev->dev, "bad queue size %u", num); return ERR_PTR(-EINVAL); } info->msix_vector = msix_vec; /* create the vring */ - vq = vring_create_virtqueue(index, size, + vq = vring_create_virtqueue(index, num, SMP_CACHE_BYTES, &vp_dev->vdev, true, true, ctx, vp_notify, callback, name); @@ -351,15 +347,12 @@ err: static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], - u32 sizes[], - const bool *ctx, + const char * const names[], const bool *ctx, struct irq_affinity *desc) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtqueue *vq; - int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, sizes, ctx, - desc); + int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, ctx, desc); if (rc) return rc; diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index d66c8e6d0ef3..4620e9d79dde 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2426,6 +2426,14 @@ static inline bool more_used(const struct vring_virtqueue *vq) return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq); } +/** + * vring_interrupt - notify a virtqueue on an interrupt + * @irq: the IRQ number (ignored) + * @_vq: the struct virtqueue to notify + * + * Calls the callback function of @_vq to process the virtqueue + * notification. + */ irqreturn_t vring_interrupt(int irq, void *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c index 9bc4d110b800..9670cc79371d 100644 --- a/drivers/virtio/virtio_vdpa.c +++ b/drivers/virtio/virtio_vdpa.c @@ -131,7 +131,7 @@ static irqreturn_t virtio_vdpa_virtqueue_cb(void *private) static struct virtqueue * virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, void (*callback)(struct virtqueue *vq), - const char *name, u32 size, bool ctx) + const char *name, bool ctx) { struct virtio_vdpa_device *vd_dev = to_virtio_vdpa_device(vdev); struct vdpa_device *vdpa = vd_get_vdpa(vdev); @@ -168,17 +168,14 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, goto error_new_virtqueue; } - if (!size || size > max_num) - size = max_num; - if (ops->get_vq_num_min) min_num = ops->get_vq_num_min(vdpa); - may_reduce_num = (size == min_num) ? false : true; + may_reduce_num = (max_num == min_num) ? false : true; /* Create the vring */ align = ops->get_vq_align(vdpa); - vq = vring_create_virtqueue(index, size, align, vdev, + vq = vring_create_virtqueue(index, max_num, align, vdev, true, may_reduce_num, ctx, virtio_vdpa_notify, callback, name); if (!vq) { @@ -272,7 +269,6 @@ static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], const char * const names[], - u32 sizes[], const bool *ctx, struct irq_affinity *desc) { @@ -288,9 +284,9 @@ static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned int nvqs, continue; } - vqs[i] = virtio_vdpa_setup_vq(vdev, queue_idx++, callbacks[i], - names[i], sizes ? sizes[i] : 0, - ctx ? ctx[i] : false); + vqs[i] = virtio_vdpa_setup_vq(vdev, queue_idx++, + callbacks[i], names[i], ctx ? + ctx[i] : false); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); goto err_setup_vq; diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index c3aecfb0a71d..993aca2f1e18 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1640,9 +1640,11 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) div64_u64(zone_unusable * 100, bg->length)); trace_btrfs_reclaim_block_group(bg); ret = btrfs_relocate_chunk(fs_info, bg->start); - if (ret) + if (ret) { + btrfs_dec_block_group_ro(bg); btrfs_err(fs_info, "error relocating chunk %llu", bg->start); + } next: btrfs_put_block_group(bg); diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 6e556031a8f3..ebfa35fe1c38 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2075,6 +2075,9 @@ cow_done: if (!p->skip_locking) { level = btrfs_header_level(b); + + btrfs_maybe_reset_lockdep_class(root, b); + if (level <= write_lock_level) { btrfs_tree_lock(b); p->locks[level] = BTRFS_WRITE_LOCK; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 4db85b9dc7ed..4edb4bfb2166 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1173,6 +1173,8 @@ enum { BTRFS_ROOT_ORPHAN_CLEANUP, /* This root has a drop operation that was started previously. */ BTRFS_ROOT_UNFINISHED_DROP, + /* This reloc root needs to have its buffers lockdep class reset. */ + BTRFS_ROOT_RESET_LOCKDEP_CLASS, }; static inline void btrfs_wake_unfinished_drop(struct btrfs_fs_info *fs_info) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4c3166f3c725..820b1f1e6b67 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -87,88 +87,6 @@ struct async_submit_bio { }; /* - * Lockdep class keys for extent_buffer->lock's in this root. For a given - * eb, the lockdep key is determined by the btrfs_root it belongs to and - * the level the eb occupies in the tree. - * - * Different roots are used for different purposes and may nest inside each - * other and they require separate keysets. As lockdep keys should be - * static, assign keysets according to the purpose of the root as indicated - * by btrfs_root->root_key.objectid. This ensures that all special purpose - * roots have separate keysets. - * - * Lock-nesting across peer nodes is always done with the immediate parent - * node locked thus preventing deadlock. As lockdep doesn't know this, use - * subclass to avoid triggering lockdep warning in such cases. - * - * The key is set by the readpage_end_io_hook after the buffer has passed - * csum validation but before the pages are unlocked. It is also set by - * btrfs_init_new_buffer on freshly allocated blocks. - * - * We also add a check to make sure the highest level of the tree is the - * same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this code - * needs update as well. - */ -#ifdef CONFIG_DEBUG_LOCK_ALLOC -# if BTRFS_MAX_LEVEL != 8 -# error -# endif - -#define DEFINE_LEVEL(stem, level) \ - .names[level] = "btrfs-" stem "-0" #level, - -#define DEFINE_NAME(stem) \ - DEFINE_LEVEL(stem, 0) \ - DEFINE_LEVEL(stem, 1) \ - DEFINE_LEVEL(stem, 2) \ - DEFINE_LEVEL(stem, 3) \ - DEFINE_LEVEL(stem, 4) \ - DEFINE_LEVEL(stem, 5) \ - DEFINE_LEVEL(stem, 6) \ - DEFINE_LEVEL(stem, 7) - -static struct btrfs_lockdep_keyset { - u64 id; /* root objectid */ - /* Longest entry: btrfs-free-space-00 */ - char names[BTRFS_MAX_LEVEL][20]; - struct lock_class_key keys[BTRFS_MAX_LEVEL]; -} btrfs_lockdep_keysets[] = { - { .id = BTRFS_ROOT_TREE_OBJECTID, DEFINE_NAME("root") }, - { .id = BTRFS_EXTENT_TREE_OBJECTID, DEFINE_NAME("extent") }, - { .id = BTRFS_CHUNK_TREE_OBJECTID, DEFINE_NAME("chunk") }, - { .id = BTRFS_DEV_TREE_OBJECTID, DEFINE_NAME("dev") }, - { .id = BTRFS_CSUM_TREE_OBJECTID, DEFINE_NAME("csum") }, - { .id = BTRFS_QUOTA_TREE_OBJECTID, DEFINE_NAME("quota") }, - { .id = BTRFS_TREE_LOG_OBJECTID, DEFINE_NAME("log") }, - { .id = BTRFS_TREE_RELOC_OBJECTID, DEFINE_NAME("treloc") }, - { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, DEFINE_NAME("dreloc") }, - { .id = BTRFS_UUID_TREE_OBJECTID, DEFINE_NAME("uuid") }, - { .id = BTRFS_FREE_SPACE_TREE_OBJECTID, DEFINE_NAME("free-space") }, - { .id = 0, DEFINE_NAME("tree") }, -}; - -#undef DEFINE_LEVEL -#undef DEFINE_NAME - -void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, - int level) -{ - struct btrfs_lockdep_keyset *ks; - - BUG_ON(level >= ARRAY_SIZE(ks->keys)); - - /* find the matching keyset, id 0 is the default entry */ - for (ks = btrfs_lockdep_keysets; ks->id; ks++) - if (ks->id == objectid) - break; - - lockdep_set_class_and_name(&eb->lock, - &ks->keys[level], ks->names[level]); -} - -#endif - -/* * Compute the csum of a btree block and store the result to provided buffer. */ static void csum_tree_block(struct extent_buffer *buf, u8 *result) diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 8993b428e09c..47ad8e0a2d33 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -137,14 +137,4 @@ int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags); int btrfs_get_free_objectid(struct btrfs_root *root, u64 *objectid); int btrfs_init_root_free_objectid(struct btrfs_root *root); -#ifdef CONFIG_DEBUG_LOCK_ALLOC -void btrfs_set_buffer_lockdep_class(u64 objectid, - struct extent_buffer *eb, int level); -#else -static inline void btrfs_set_buffer_lockdep_class(u64 objectid, - struct extent_buffer *eb, int level) -{ -} -#endif - #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ea3ec1e761e8..ab944d1f94ef 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4867,6 +4867,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, { struct btrfs_fs_info *fs_info = root->fs_info; struct extent_buffer *buf; + u64 lockdep_owner = owner; buf = btrfs_find_create_tree_block(fs_info, bytenr, owner, level); if (IS_ERR(buf)) @@ -4886,11 +4887,26 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, } /* + * The reloc trees are just snapshots, so we need them to appear to be + * just like any other fs tree WRT lockdep. + * + * The exception however is in replace_path() in relocation, where we + * hold the lock on the original fs root and then search for the reloc + * root. At that point we need to make sure any reloc root buffers are + * set to the BTRFS_TREE_RELOC_OBJECTID lockdep class in order to make + * lockdep happy. + */ + if (lockdep_owner == BTRFS_TREE_RELOC_OBJECTID && + !test_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &root->state)) + lockdep_owner = BTRFS_FS_TREE_OBJECTID; + + /* * This needs to stay, because we could allocate a freed block from an * old tree into a new tree, so we need to make sure this new block is * set to the appropriate level and owner. */ - btrfs_set_buffer_lockdep_class(owner, buf, level); + btrfs_set_buffer_lockdep_class(lockdep_owner, buf, level); + __btrfs_tree_lock(buf, nest); btrfs_clean_tree_block(buf); clear_bit(EXTENT_BUFFER_STALE, &buf->bflags); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index bfae67c593c5..eed81a7e36a4 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -6140,6 +6140,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, struct extent_buffer *exists = NULL; struct page *p; struct address_space *mapping = fs_info->btree_inode->i_mapping; + u64 lockdep_owner = owner_root; int uptodate = 1; int ret; @@ -6164,7 +6165,15 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, eb = __alloc_extent_buffer(fs_info, start, len); if (!eb) return ERR_PTR(-ENOMEM); - btrfs_set_buffer_lockdep_class(owner_root, eb, level); + + /* + * The reloc trees are just snapshots, so we need them to appear to be + * just like any other fs tree WRT lockdep. + */ + if (lockdep_owner == BTRFS_TREE_RELOC_OBJECTID) + lockdep_owner = BTRFS_FS_TREE_OBJECTID; + + btrfs_set_buffer_lockdep_class(lockdep_owner, eb, level); num_pages = num_extent_pages(eb); for (i = 0; i < num_pages; i++, index++) { diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 33461b4f9c8b..9063072b399b 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -14,6 +14,93 @@ #include "locking.h" /* + * Lockdep class keys for extent_buffer->lock's in this root. For a given + * eb, the lockdep key is determined by the btrfs_root it belongs to and + * the level the eb occupies in the tree. + * + * Different roots are used for different purposes and may nest inside each + * other and they require separate keysets. As lockdep keys should be + * static, assign keysets according to the purpose of the root as indicated + * by btrfs_root->root_key.objectid. This ensures that all special purpose + * roots have separate keysets. + * + * Lock-nesting across peer nodes is always done with the immediate parent + * node locked thus preventing deadlock. As lockdep doesn't know this, use + * subclass to avoid triggering lockdep warning in such cases. + * + * The key is set by the readpage_end_io_hook after the buffer has passed + * csum validation but before the pages are unlocked. It is also set by + * btrfs_init_new_buffer on freshly allocated blocks. + * + * We also add a check to make sure the highest level of the tree is the + * same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this code + * needs update as well. + */ +#ifdef CONFIG_DEBUG_LOCK_ALLOC +#if BTRFS_MAX_LEVEL != 8 +#error +#endif + +#define DEFINE_LEVEL(stem, level) \ + .names[level] = "btrfs-" stem "-0" #level, + +#define DEFINE_NAME(stem) \ + DEFINE_LEVEL(stem, 0) \ + DEFINE_LEVEL(stem, 1) \ + DEFINE_LEVEL(stem, 2) \ + DEFINE_LEVEL(stem, 3) \ + DEFINE_LEVEL(stem, 4) \ + DEFINE_LEVEL(stem, 5) \ + DEFINE_LEVEL(stem, 6) \ + DEFINE_LEVEL(stem, 7) + +static struct btrfs_lockdep_keyset { + u64 id; /* root objectid */ + /* Longest entry: btrfs-free-space-00 */ + char names[BTRFS_MAX_LEVEL][20]; + struct lock_class_key keys[BTRFS_MAX_LEVEL]; +} btrfs_lockdep_keysets[] = { + { .id = BTRFS_ROOT_TREE_OBJECTID, DEFINE_NAME("root") }, + { .id = BTRFS_EXTENT_TREE_OBJECTID, DEFINE_NAME("extent") }, + { .id = BTRFS_CHUNK_TREE_OBJECTID, DEFINE_NAME("chunk") }, + { .id = BTRFS_DEV_TREE_OBJECTID, DEFINE_NAME("dev") }, + { .id = BTRFS_CSUM_TREE_OBJECTID, DEFINE_NAME("csum") }, + { .id = BTRFS_QUOTA_TREE_OBJECTID, DEFINE_NAME("quota") }, + { .id = BTRFS_TREE_LOG_OBJECTID, DEFINE_NAME("log") }, + { .id = BTRFS_TREE_RELOC_OBJECTID, DEFINE_NAME("treloc") }, + { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, DEFINE_NAME("dreloc") }, + { .id = BTRFS_UUID_TREE_OBJECTID, DEFINE_NAME("uuid") }, + { .id = BTRFS_FREE_SPACE_TREE_OBJECTID, DEFINE_NAME("free-space") }, + { .id = 0, DEFINE_NAME("tree") }, +}; + +#undef DEFINE_LEVEL +#undef DEFINE_NAME + +void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, int level) +{ + struct btrfs_lockdep_keyset *ks; + + BUG_ON(level >= ARRAY_SIZE(ks->keys)); + + /* Find the matching keyset, id 0 is the default entry */ + for (ks = btrfs_lockdep_keysets; ks->id; ks++) + if (ks->id == objectid) + break; + + lockdep_set_class_and_name(&eb->lock, &ks->keys[level], ks->names[level]); +} + +void btrfs_maybe_reset_lockdep_class(struct btrfs_root *root, struct extent_buffer *eb) +{ + if (test_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &root->state)) + btrfs_set_buffer_lockdep_class(root->root_key.objectid, + eb, btrfs_header_level(eb)); +} + +#endif + +/* * Extent buffer locking * ===================== * @@ -164,6 +251,8 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) while (1) { eb = btrfs_root_node(root); + + btrfs_maybe_reset_lockdep_class(root, eb); btrfs_tree_lock(eb); if (eb == root->node) break; @@ -185,6 +274,8 @@ struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root) while (1) { eb = btrfs_root_node(root); + + btrfs_maybe_reset_lockdep_class(root, eb); btrfs_tree_read_lock(eb); if (eb == root->node) break; diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h index bbc45534ae9a..ab268be09bb5 100644 --- a/fs/btrfs/locking.h +++ b/fs/btrfs/locking.h @@ -131,4 +131,18 @@ void btrfs_drew_write_unlock(struct btrfs_drew_lock *lock); void btrfs_drew_read_lock(struct btrfs_drew_lock *lock); void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock); +#ifdef CONFIG_DEBUG_LOCK_ALLOC +void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, int level); +void btrfs_maybe_reset_lockdep_class(struct btrfs_root *root, struct extent_buffer *eb); +#else +static inline void btrfs_set_buffer_lockdep_class(u64 objectid, + struct extent_buffer *eb, int level) +{ +} +static inline void btrfs_maybe_reset_lockdep_class(struct btrfs_root *root, + struct extent_buffer *eb) +{ +} +#endif + #endif diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index a6dc827e75af..45c02aba2492 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1326,7 +1326,9 @@ again: btrfs_release_path(path); path->lowest_level = level; + set_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &src->state); ret = btrfs_search_slot(trans, src, &key, path, 0, 1); + clear_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &src->state); path->lowest_level = 0; if (ret) { if (ret > 0) @@ -3573,7 +3575,12 @@ int prepare_to_relocate(struct reloc_control *rc) */ return PTR_ERR(trans); } - return btrfs_commit_transaction(trans); + + ret = btrfs_commit_transaction(trans); + if (ret) + unset_reloc_control(rc); + + return ret; } static noinline_for_stack int relocate_block_group(struct reloc_control *rc) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 9e0e0ae2288c..43f905ab0a18 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -1233,7 +1233,8 @@ static void extent_err(const struct extent_buffer *eb, int slot, } static int check_extent_item(struct extent_buffer *leaf, - struct btrfs_key *key, int slot) + struct btrfs_key *key, int slot, + struct btrfs_key *prev_key) { struct btrfs_fs_info *fs_info = leaf->fs_info; struct btrfs_extent_item *ei; @@ -1453,6 +1454,26 @@ static int check_extent_item(struct extent_buffer *leaf, total_refs, inline_refs); return -EUCLEAN; } + + if ((prev_key->type == BTRFS_EXTENT_ITEM_KEY) || + (prev_key->type == BTRFS_METADATA_ITEM_KEY)) { + u64 prev_end = prev_key->objectid; + + if (prev_key->type == BTRFS_METADATA_ITEM_KEY) + prev_end += fs_info->nodesize; + else + prev_end += prev_key->offset; + + if (unlikely(prev_end > key->objectid)) { + extent_err(leaf, slot, + "previous extent [%llu %u %llu] overlaps current extent [%llu %u %llu]", + prev_key->objectid, prev_key->type, + prev_key->offset, key->objectid, key->type, + key->offset); + return -EUCLEAN; + } + } + return 0; } @@ -1621,7 +1642,7 @@ static int check_leaf_item(struct extent_buffer *leaf, break; case BTRFS_EXTENT_ITEM_KEY: case BTRFS_METADATA_ITEM_KEY: - ret = check_extent_item(leaf, key, slot); + ret = check_extent_item(leaf, key, slot, prev_key); break; case BTRFS_TREE_BLOCK_REF_KEY: case BTRFS_SHARED_DATA_REF_KEY: diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index dcf75a8daa20..9205c4a5ca81 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1146,7 +1146,9 @@ again: extref = btrfs_lookup_inode_extref(NULL, root, path, name, namelen, inode_objectid, parent_objectid, 0, 0); - if (!IS_ERR_OR_NULL(extref)) { + if (IS_ERR(extref)) { + return PTR_ERR(extref); + } else if (extref) { u32 item_size; u32 cur_offset = 0; unsigned long base; @@ -1457,7 +1459,7 @@ static int add_link(struct btrfs_trans_handle *trans, * on the inode will not free it. We will fixup the link count later. */ if (other_inode->i_nlink == 0) - inc_nlink(other_inode); + set_nlink(other_inode, 1); add_link: ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), name, namelen, 0, ref_index); @@ -1600,7 +1602,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, * free it. We will fixup the link count later. */ if (!ret && inode->i_nlink == 0) - inc_nlink(inode); + set_nlink(inode, 1); } if (ret < 0) goto out; diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 11fd85de7217..c05477e28cff 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -42,7 +42,7 @@ void cifs_dump_detail(void *buf, struct TCP_Server_Info *server) smb->Command, smb->Status.CifsError, smb->Flags, smb->Flags2, smb->Mid, smb->Pid); cifs_dbg(VFS, "smb buf %p len %u\n", smb, - server->ops->calc_smb_size(smb, server)); + server->ops->calc_smb_size(smb)); #endif /* CONFIG_CIFS_DEBUG2 */ } diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index bc0ee2d4b47b..f15d7b0c123d 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -417,7 +417,7 @@ struct smb_version_operations { int (*close_dir)(const unsigned int, struct cifs_tcon *, struct cifs_fid *); /* calculate a size of SMB message */ - unsigned int (*calc_smb_size)(void *buf, struct TCP_Server_Info *ptcpi); + unsigned int (*calc_smb_size)(void *buf); /* check for STATUS_PENDING and process the response if yes */ bool (*is_status_pending)(char *buf, struct TCP_Server_Info *server); /* check for STATUS_NETWORK_SESSION_EXPIRED */ diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 87a77a684339..3bc94bcc7177 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -151,7 +151,7 @@ extern int cifs_get_writable_path(struct cifs_tcon *tcon, const char *name, extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool); extern int cifs_get_readable_path(struct cifs_tcon *tcon, const char *name, struct cifsFileInfo **ret_file); -extern unsigned int smbCalcSize(void *buf, struct TCP_Server_Info *server); +extern unsigned int smbCalcSize(void *buf); extern int decode_negTokenInit(unsigned char *security_blob, int length, struct TCP_Server_Info *server); extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len); diff --git a/fs/cifs/cifsroot.c b/fs/cifs/cifsroot.c index 9e91a5a40aae..56ec1b233f52 100644 --- a/fs/cifs/cifsroot.c +++ b/fs/cifs/cifsroot.c @@ -59,7 +59,7 @@ static int __init cifs_root_setup(char *line) pr_err("Root-CIFS: UNC path too long\n"); return 1; } - strlcpy(root_dev, line, len); + strscpy(root_dev, line, len); srvaddr = parse_srvaddr(&line[2], s); if (*s) { int n = snprintf(root_opts, diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 9111c025bcb8..3da5da9f16b0 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3994,7 +3994,7 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, } bcc_ptr += length + 1; bytes_left -= (length + 1); - strlcpy(tcon->treeName, tree, sizeof(tcon->treeName)); + strscpy(tcon->treeName, tree, sizeof(tcon->treeName)); /* mostly informational -- no need to fail on error here */ kfree(tcon->nativeFileSystem); diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 34d990f06fd6..87f60f736731 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -354,7 +354,7 @@ checkSMB(char *buf, unsigned int total_read, struct TCP_Server_Info *server) /* otherwise, there is enough to get to the BCC */ if (check_smb_hdr(smb)) return -EIO; - clc_len = smbCalcSize(smb, server); + clc_len = smbCalcSize(smb); if (4 + rfclen != total_read) { cifs_dbg(VFS, "Length read does not match RFC1001 length %d\n", @@ -737,6 +737,8 @@ cifs_close_deferred_file(struct cifsInodeInfo *cifs_inode) list_for_each_entry(cfile, &cifs_inode->openFileList, flist) { if (delayed_work_pending(&cfile->deferred)) { if (cancel_delayed_work(&cfile->deferred)) { + cifs_del_deferred_close(cfile); + tmp_list = kmalloc(sizeof(struct file_list), GFP_ATOMIC); if (tmp_list == NULL) break; @@ -766,6 +768,8 @@ cifs_close_all_deferred_files(struct cifs_tcon *tcon) list_for_each_entry(cfile, &tcon->openFileList, tlist) { if (delayed_work_pending(&cfile->deferred)) { if (cancel_delayed_work(&cfile->deferred)) { + cifs_del_deferred_close(cfile); + tmp_list = kmalloc(sizeof(struct file_list), GFP_ATOMIC); if (tmp_list == NULL) break; @@ -799,6 +803,8 @@ cifs_close_deferred_file_under_dentry(struct cifs_tcon *tcon, const char *path) if (strstr(full_path, path)) { if (delayed_work_pending(&cfile->deferred)) { if (cancel_delayed_work(&cfile->deferred)) { + cifs_del_deferred_close(cfile); + tmp_list = kmalloc(sizeof(struct file_list), GFP_ATOMIC); if (tmp_list == NULL) break; diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index 28caae7aed1b..1b52e6ac431c 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -909,7 +909,7 @@ map_and_check_smb_error(struct mid_q_entry *mid, bool logErr) * portion, the number of word parameters and the data portion of the message */ unsigned int -smbCalcSize(void *buf, struct TCP_Server_Info *server) +smbCalcSize(void *buf) { struct smb_hdr *ptr = buf; return (sizeof(struct smb_hdr) + (2 * ptr->WordCount) + diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 2eece8a07c11..8e060c00c969 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -806,8 +806,7 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos, end_of_smb = cfile->srch_inf.ntwrk_buf_start + server->ops->calc_smb_size( - cfile->srch_inf.ntwrk_buf_start, - server); + cfile->srch_inf.ntwrk_buf_start); cur_ent = cfile->srch_inf.srch_entries_start; first_entry_in_buffer = cfile->srch_inf.index_of_last_entry @@ -1161,8 +1160,7 @@ int cifs_readdir(struct file *file, struct dir_context *ctx) cifs_dbg(FYI, "loop through %d times filling dir for net buf %p\n", num_to_fill, cifsFile->srch_inf.ntwrk_buf_start); max_len = tcon->ses->server->ops->calc_smb_size( - cifsFile->srch_inf.ntwrk_buf_start, - tcon->ses->server); + cifsFile->srch_inf.ntwrk_buf_start); end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len; tmp_buf = kmalloc(UNICODE_NAME_MAX, GFP_KERNEL); diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index f5dcc4940b6d..9dfd2dd612c2 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -61,7 +61,6 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, nr_ioctl_req.Reserved = 0; rc = SMB2_ioctl(xid, oparms->tcon, fid->persistent_fid, fid->volatile_fid, FSCTL_LMR_REQUEST_RESILIENCY, - true /* is_fsctl */, (char *)&nr_ioctl_req, sizeof(nr_ioctl_req), CIFSMaxBufSize, NULL, NULL /* no return info */); if (rc == -EOPNOTSUPP) { diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 6a6ec6efb45a..d73e5672aac4 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -222,7 +222,7 @@ smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *server) } } - calc_len = smb2_calc_size(buf, server); + calc_len = smb2_calc_size(buf); /* For SMB2_IOCTL, OutputOffset and OutputLength are optional, so might * be 0, and not a real miscalculation */ @@ -410,7 +410,7 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *shdr) * portion, the number of word parameters and the data portion of the message. */ unsigned int -smb2_calc_size(void *buf, struct TCP_Server_Info *srvr) +smb2_calc_size(void *buf) { struct smb2_pdu *pdu = buf; struct smb2_hdr *shdr = &pdu->hdr; diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index f406af596887..96f3b0573606 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -387,7 +387,7 @@ smb2_dump_detail(void *buf, struct TCP_Server_Info *server) shdr->Command, shdr->Status, shdr->Flags, shdr->MessageId, shdr->Id.SyncId.ProcessId); cifs_server_dbg(VFS, "smb buf %p len %u\n", buf, - server->ops->calc_smb_size(buf, server)); + server->ops->calc_smb_size(buf)); #endif } @@ -681,7 +681,7 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon) struct cifs_ses *ses = tcon->ses; rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID, - FSCTL_QUERY_NETWORK_INTERFACE_INFO, true /* is_fsctl */, + FSCTL_QUERY_NETWORK_INTERFACE_INFO, NULL /* no data input */, 0 /* no data input */, CIFSMaxBufSize, (char **)&out_buf, &ret_data_len); if (rc == -EOPNOTSUPP) { @@ -1323,9 +1323,8 @@ SMB2_request_res_key(const unsigned int xid, struct cifs_tcon *tcon, struct resume_key_req *res_key; rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid, - FSCTL_SRV_REQUEST_RESUME_KEY, true /* is_fsctl */, - NULL, 0 /* no input */, CIFSMaxBufSize, - (char **)&res_key, &ret_data_len); + FSCTL_SRV_REQUEST_RESUME_KEY, NULL, 0 /* no input */, + CIFSMaxBufSize, (char **)&res_key, &ret_data_len); if (rc == -EOPNOTSUPP) { pr_warn_once("Server share %s does not support copy range\n", tcon->treeName); @@ -1467,7 +1466,7 @@ smb2_ioctl_query_info(const unsigned int xid, rqst[1].rq_nvec = SMB2_IOCTL_IOV_SIZE; rc = SMB2_ioctl_init(tcon, server, &rqst[1], COMPOUND_FID, COMPOUND_FID, - qi.info_type, true, buffer, qi.output_buffer_length, + qi.info_type, buffer, qi.output_buffer_length, CIFSMaxBufSize - MAX_SMB2_CREATE_RESPONSE_SIZE - MAX_SMB2_CLOSE_RESPONSE_SIZE); free_req1_func = SMB2_ioctl_free; @@ -1643,9 +1642,8 @@ smb2_copychunk_range(const unsigned int xid, retbuf = NULL; rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid, trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE, - true /* is_fsctl */, (char *)pcchunk, - sizeof(struct copychunk_ioctl), CIFSMaxBufSize, - (char **)&retbuf, &ret_data_len); + (char *)pcchunk, sizeof(struct copychunk_ioctl), + CIFSMaxBufSize, (char **)&retbuf, &ret_data_len); if (rc == 0) { if (ret_data_len != sizeof(struct copychunk_ioctl_rsp)) { @@ -1805,7 +1803,6 @@ static bool smb2_set_sparse(const unsigned int xid, struct cifs_tcon *tcon, rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, FSCTL_SET_SPARSE, - true /* is_fctl */, &setsparse, 1, CIFSMaxBufSize, NULL, NULL); if (rc) { tcon->broken_sparse_sup = true; @@ -1888,7 +1885,6 @@ smb2_duplicate_extents(const unsigned int xid, rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid, trgtfile->fid.volatile_fid, FSCTL_DUPLICATE_EXTENTS_TO_FILE, - true /* is_fsctl */, (char *)&dup_ext_buf, sizeof(struct duplicate_extents_to_file), CIFSMaxBufSize, NULL, @@ -1923,7 +1919,6 @@ smb3_set_integrity(const unsigned int xid, struct cifs_tcon *tcon, return SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, FSCTL_SET_INTEGRITY_INFORMATION, - true /* is_fsctl */, (char *)&integr_info, sizeof(struct fsctl_set_integrity_information_req), CIFSMaxBufSize, NULL, @@ -1976,7 +1971,6 @@ smb3_enum_snapshots(const unsigned int xid, struct cifs_tcon *tcon, rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, FSCTL_SRV_ENUMERATE_SNAPSHOTS, - true /* is_fsctl */, NULL, 0 /* no input data */, max_response_size, (char **)&retbuf, &ret_data_len); @@ -2699,7 +2693,6 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, do { rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID, FSCTL_DFS_GET_REFERRALS, - true /* is_fsctl */, (char *)dfs_req, dfs_req_size, CIFSMaxBufSize, (char **)&dfs_rsp, &dfs_rsp_size); if (!is_retryable_error(rc)) @@ -2906,8 +2899,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, rc = SMB2_ioctl_init(tcon, server, &rqst[1], fid.persistent_fid, - fid.volatile_fid, FSCTL_GET_REPARSE_POINT, - true /* is_fctl */, NULL, 0, + fid.volatile_fid, FSCTL_GET_REPARSE_POINT, NULL, 0, CIFSMaxBufSize - MAX_SMB2_CREATE_RESPONSE_SIZE - MAX_SMB2_CLOSE_RESPONSE_SIZE); @@ -3087,8 +3079,7 @@ smb2_query_reparse_tag(const unsigned int xid, struct cifs_tcon *tcon, rc = SMB2_ioctl_init(tcon, server, &rqst[1], COMPOUND_FID, - COMPOUND_FID, FSCTL_GET_REPARSE_POINT, - true /* is_fctl */, NULL, 0, + COMPOUND_FID, FSCTL_GET_REPARSE_POINT, NULL, 0, CIFSMaxBufSize - MAX_SMB2_CREATE_RESPONSE_SIZE - MAX_SMB2_CLOSE_RESPONSE_SIZE); @@ -3358,7 +3349,7 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, fsctl_buf.BeyondFinalZero = cpu_to_le64(offset + len); rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, - cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA, true, + cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA, (char *)&fsctl_buf, sizeof(struct file_zero_data_information), 0, NULL, NULL); @@ -3421,7 +3412,7 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA, - true /* is_fctl */, (char *)&fsctl_buf, + (char *)&fsctl_buf, sizeof(struct file_zero_data_information), CIFSMaxBufSize, NULL, NULL); free_xid(xid); @@ -3481,7 +3472,7 @@ static int smb3_simple_fallocate_range(unsigned int xid, in_data.length = cpu_to_le64(len); rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, - FSCTL_QUERY_ALLOCATED_RANGES, true, + FSCTL_QUERY_ALLOCATED_RANGES, (char *)&in_data, sizeof(in_data), 1024 * sizeof(struct file_allocated_range_buffer), (char **)&out_data, &out_data_len); @@ -3802,7 +3793,7 @@ static loff_t smb3_llseek(struct file *file, struct cifs_tcon *tcon, loff_t offs rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, - FSCTL_QUERY_ALLOCATED_RANGES, true, + FSCTL_QUERY_ALLOCATED_RANGES, (char *)&in_data, sizeof(in_data), sizeof(struct file_allocated_range_buffer), (char **)&out_data, &out_data_len); @@ -3862,7 +3853,7 @@ static int smb3_fiemap(struct cifs_tcon *tcon, rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, - FSCTL_QUERY_ALLOCATED_RANGES, true, + FSCTL_QUERY_ALLOCATED_RANGES, (char *)&in_data, sizeof(in_data), 1024 * sizeof(struct file_allocated_range_buffer), (char **)&out_data, &out_data_len); diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 9b31ea946d45..91cfc5b47ac7 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1173,7 +1173,7 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) } rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID, - FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */, + FSCTL_VALIDATE_NEGOTIATE_INFO, (char *)pneg_inbuf, inbuflen, CIFSMaxBufSize, (char **)&pneg_rsp, &rsplen); if (rc == -EOPNOTSUPP) { @@ -1928,7 +1928,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, tcon->capabilities = rsp->Capabilities; /* we keep caps little endian */ tcon->maximal_access = le32_to_cpu(rsp->MaximalAccess); tcon->tid = le32_to_cpu(rsp->hdr.Id.SyncId.TreeId); - strlcpy(tcon->treeName, tree, sizeof(tcon->treeName)); + strscpy(tcon->treeName, tree, sizeof(tcon->treeName)); if ((rsp->Capabilities & SMB2_SHARE_CAP_DFS) && ((tcon->share_flags & SHI1005_FLAGS_DFS) == 0)) @@ -3056,7 +3056,7 @@ int SMB2_ioctl_init(struct cifs_tcon *tcon, struct TCP_Server_Info *server, struct smb_rqst *rqst, u64 persistent_fid, u64 volatile_fid, u32 opcode, - bool is_fsctl, char *in_data, u32 indatalen, + char *in_data, u32 indatalen, __u32 max_response_size) { struct smb2_ioctl_req *req; @@ -3131,10 +3131,8 @@ SMB2_ioctl_init(struct cifs_tcon *tcon, struct TCP_Server_Info *server, req->hdr.CreditCharge = cpu_to_le16(DIV_ROUND_UP(max(indatalen, max_response_size), SMB2_MAX_BUFFER_SIZE)); - if (is_fsctl) - req->Flags = cpu_to_le32(SMB2_0_IOCTL_IS_FSCTL); - else - req->Flags = 0; + /* always an FSCTL (for now) */ + req->Flags = cpu_to_le32(SMB2_0_IOCTL_IS_FSCTL); /* validate negotiate request must be signed - see MS-SMB2 3.2.5.5 */ if (opcode == FSCTL_VALIDATE_NEGOTIATE_INFO) @@ -3161,9 +3159,9 @@ SMB2_ioctl_free(struct smb_rqst *rqst) */ int SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, - u64 volatile_fid, u32 opcode, bool is_fsctl, - char *in_data, u32 indatalen, u32 max_out_data_len, - char **out_data, u32 *plen /* returned data len */) + u64 volatile_fid, u32 opcode, char *in_data, u32 indatalen, + u32 max_out_data_len, char **out_data, + u32 *plen /* returned data len */) { struct smb_rqst rqst; struct smb2_ioctl_rsp *rsp = NULL; @@ -3205,7 +3203,7 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, rc = SMB2_ioctl_init(tcon, server, &rqst, persistent_fid, volatile_fid, opcode, - is_fsctl, in_data, indatalen, max_out_data_len); + in_data, indatalen, max_out_data_len); if (rc) goto ioctl_exit; @@ -3297,7 +3295,7 @@ SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon, cpu_to_le16(COMPRESSION_FORMAT_DEFAULT); rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid, - FSCTL_SET_COMPRESSION, true /* is_fsctl */, + FSCTL_SET_COMPRESSION, (char *)&fsctl_input /* data input */, 2 /* in data len */, CIFSMaxBufSize /* max out data */, &ret_data /* out data */, NULL); diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 51c5bf4a338a..3f740f24b96a 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -23,7 +23,7 @@ struct smb_rqst; extern int map_smb2_to_linux_error(char *buf, bool log_err); extern int smb2_check_message(char *buf, unsigned int length, struct TCP_Server_Info *server); -extern unsigned int smb2_calc_size(void *buf, struct TCP_Server_Info *server); +extern unsigned int smb2_calc_size(void *buf); extern char *smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *shdr); extern __le16 *cifs_convert_path_to_utf16(const char *from, @@ -137,13 +137,13 @@ extern int SMB2_open_init(struct cifs_tcon *tcon, extern void SMB2_open_free(struct smb_rqst *rqst); extern int SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, u32 opcode, - bool is_fsctl, char *in_data, u32 indatalen, u32 maxoutlen, + char *in_data, u32 indatalen, u32 maxoutlen, char **out_data, u32 *plen /* returned data len */); extern int SMB2_ioctl_init(struct cifs_tcon *tcon, struct TCP_Server_Info *server, struct smb_rqst *rqst, u64 persistent_fid, u64 volatile_fid, u32 opcode, - bool is_fsctl, char *in_data, u32 indatalen, + char *in_data, u32 indatalen, __u32 max_response_size); extern void SMB2_ioctl_free(struct smb_rqst *rqst); extern int SMB2_change_notify(const unsigned int xid, struct cifs_tcon *tcon, diff --git a/fs/dcache.c b/fs/dcache.c index c5dc32a59c76..bb0c4d0038db 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2270,6 +2270,48 @@ bool d_same_name(const struct dentry *dentry, const struct dentry *parent, } EXPORT_SYMBOL_GPL(d_same_name); +/* + * This is __d_lookup_rcu() when the parent dentry has + * DCACHE_OP_COMPARE, which makes things much nastier. + */ +static noinline struct dentry *__d_lookup_rcu_op_compare( + const struct dentry *parent, + const struct qstr *name, + unsigned *seqp) +{ + u64 hashlen = name->hash_len; + struct hlist_bl_head *b = d_hash(hashlen_hash(hashlen)); + struct hlist_bl_node *node; + struct dentry *dentry; + + hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { + int tlen; + const char *tname; + unsigned seq; + +seqretry: + seq = raw_seqcount_begin(&dentry->d_seq); + if (dentry->d_parent != parent) + continue; + if (d_unhashed(dentry)) + continue; + if (dentry->d_name.hash != hashlen_hash(hashlen)) + continue; + tlen = dentry->d_name.len; + tname = dentry->d_name.name; + /* we want a consistent (name,len) pair */ + if (read_seqcount_retry(&dentry->d_seq, seq)) { + cpu_relax(); + goto seqretry; + } + if (parent->d_op->d_compare(dentry, tlen, tname, name) != 0) + continue; + *seqp = seq; + return dentry; + } + return NULL; +} + /** * __d_lookup_rcu - search for a dentry (racy, store-free) * @parent: parent dentry @@ -2316,6 +2358,9 @@ struct dentry *__d_lookup_rcu(const struct dentry *parent, * Keep the two functions in sync. */ + if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) + return __d_lookup_rcu_op_compare(parent, name, seqp); + /* * The hash list is protected using RCU. * @@ -2332,7 +2377,6 @@ struct dentry *__d_lookup_rcu(const struct dentry *parent, hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { unsigned seq; -seqretry: /* * The dentry sequence count protects us from concurrent * renames, and thus protects parent and name fields. @@ -2355,28 +2399,10 @@ seqretry: continue; if (d_unhashed(dentry)) continue; - - if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) { - int tlen; - const char *tname; - if (dentry->d_name.hash != hashlen_hash(hashlen)) - continue; - tlen = dentry->d_name.len; - tname = dentry->d_name.name; - /* we want a consistent (name,len) pair */ - if (read_seqcount_retry(&dentry->d_seq, seq)) { - cpu_relax(); - goto seqretry; - } - if (parent->d_op->d_compare(dentry, - tlen, tname, name) != 0) - continue; - } else { - if (dentry->d_name.hash_len != hashlen) - continue; - if (dentry_cmp(dentry, str, hashlen_len(hashlen)) != 0) - continue; - } + if (dentry->d_name.hash_len != hashlen) + continue; + if (dentry_cmp(dentry, str, hashlen_len(hashlen)) != 0) + continue; *seqp = seq; return dentry; } diff --git a/fs/exec.c b/fs/exec.c index f793221f4eb6..9a5ca7b82bfc 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -584,11 +584,11 @@ static int copy_strings(int argc, struct user_arg_ptr argv, if (kmapped_page) { flush_dcache_page(kmapped_page); - kunmap(kmapped_page); + kunmap_local(kaddr); put_arg_page(kmapped_page); } kmapped_page = page; - kaddr = kmap(kmapped_page); + kaddr = kmap_local_page(kmapped_page); kpos = pos & PAGE_MASK; flush_arg_page(bprm, kpos, kmapped_page); } @@ -602,7 +602,7 @@ static int copy_strings(int argc, struct user_arg_ptr argv, out: if (kmapped_page) { flush_dcache_page(kmapped_page); - kunmap(kmapped_page); + kunmap_local(kaddr); put_arg_page(kmapped_page); } return ret; @@ -880,11 +880,11 @@ int transfer_args_to_stack(struct linux_binprm *bprm, for (index = MAX_ARG_PAGES - 1; index >= stop; index--) { unsigned int offset = index == stop ? bprm->p & ~PAGE_MASK : 0; - char *src = kmap(bprm->page[index]) + offset; + char *src = kmap_local_page(bprm->page[index]) + offset; sp -= PAGE_SIZE - offset; if (copy_to_user((void *) sp, src, PAGE_SIZE - offset) != 0) ret = -EFAULT; - kunmap(bprm->page[index]); + kunmap_local(src); if (ret) goto out; } @@ -1686,13 +1686,13 @@ int remove_arg_zero(struct linux_binprm *bprm) ret = -EFAULT; goto out; } - kaddr = kmap_atomic(page); + kaddr = kmap_local_page(page); for (; offset < PAGE_SIZE && kaddr[offset]; offset++, bprm->p++) ; - kunmap_atomic(kaddr); + kunmap_local(kaddr); put_arg_page(page); } while (offset == PAGE_SIZE); diff --git a/fs/ksmbd/ksmbd_netlink.h b/fs/ksmbd/ksmbd_netlink.h index 52aa0adeb951..e0cbcfa98c7e 100644 --- a/fs/ksmbd/ksmbd_netlink.h +++ b/fs/ksmbd/ksmbd_netlink.h @@ -349,6 +349,7 @@ enum KSMBD_TREE_CONN_STATUS { #define KSMBD_SHARE_FLAG_STREAMS BIT(11) #define KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS BIT(12) #define KSMBD_SHARE_FLAG_ACL_XATTR BIT(13) +#define KSMBD_SHARE_FLAG_UPDATE BIT(14) /* * Tree connect request flags. @@ -364,6 +365,7 @@ enum KSMBD_TREE_CONN_STATUS { #define KSMBD_TREE_CONN_FLAG_READ_ONLY BIT(1) #define KSMBD_TREE_CONN_FLAG_WRITABLE BIT(2) #define KSMBD_TREE_CONN_FLAG_ADMIN_ACCOUNT BIT(3) +#define KSMBD_TREE_CONN_FLAG_UPDATE BIT(4) /* * RPC over IPC. diff --git a/fs/ksmbd/mgmt/share_config.c b/fs/ksmbd/mgmt/share_config.c index 70655af93b44..c9bca1c2c834 100644 --- a/fs/ksmbd/mgmt/share_config.c +++ b/fs/ksmbd/mgmt/share_config.c @@ -51,12 +51,16 @@ static void kill_share(struct ksmbd_share_config *share) kfree(share); } -void __ksmbd_share_config_put(struct ksmbd_share_config *share) +void ksmbd_share_config_del(struct ksmbd_share_config *share) { down_write(&shares_table_lock); hash_del(&share->hlist); up_write(&shares_table_lock); +} +void __ksmbd_share_config_put(struct ksmbd_share_config *share) +{ + ksmbd_share_config_del(share); kill_share(share); } diff --git a/fs/ksmbd/mgmt/share_config.h b/fs/ksmbd/mgmt/share_config.h index 28bf3511763f..902f2cb1963a 100644 --- a/fs/ksmbd/mgmt/share_config.h +++ b/fs/ksmbd/mgmt/share_config.h @@ -64,6 +64,7 @@ static inline int test_share_config_flag(struct ksmbd_share_config *share, return share->flags & flag; } +void ksmbd_share_config_del(struct ksmbd_share_config *share); void __ksmbd_share_config_put(struct ksmbd_share_config *share); static inline void ksmbd_share_config_put(struct ksmbd_share_config *share) diff --git a/fs/ksmbd/mgmt/tree_connect.c b/fs/ksmbd/mgmt/tree_connect.c index b35ea6a6abc5..97ab7987df6e 100644 --- a/fs/ksmbd/mgmt/tree_connect.c +++ b/fs/ksmbd/mgmt/tree_connect.c @@ -19,7 +19,7 @@ struct ksmbd_tree_conn_status ksmbd_tree_conn_connect(struct ksmbd_conn *conn, struct ksmbd_session *sess, char *share_name) { - struct ksmbd_tree_conn_status status = {-EINVAL, NULL}; + struct ksmbd_tree_conn_status status = {-ENOENT, NULL}; struct ksmbd_tree_connect_response *resp = NULL; struct ksmbd_share_config *sc; struct ksmbd_tree_connect *tree_conn = NULL; @@ -57,6 +57,20 @@ ksmbd_tree_conn_connect(struct ksmbd_conn *conn, struct ksmbd_session *sess, goto out_error; tree_conn->flags = resp->connection_flags; + if (test_tree_conn_flag(tree_conn, KSMBD_TREE_CONN_FLAG_UPDATE)) { + struct ksmbd_share_config *new_sc; + + ksmbd_share_config_del(sc); + new_sc = ksmbd_share_config_get(share_name); + if (!new_sc) { + pr_err("Failed to update stale share config\n"); + status.ret = -ESTALE; + goto out_error; + } + ksmbd_share_config_put(sc); + sc = new_sc; + } + tree_conn->user = sess->user; tree_conn->share_conf = sc; status.tree_conn = tree_conn; diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 9751cc92c111..19412ac701a6 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -1944,8 +1944,10 @@ out_err1: rsp->hdr.Status = STATUS_SUCCESS; rc = 0; break; + case -ESTALE: + case -ENOENT: case KSMBD_TREE_CONN_STATUS_NO_SHARE: - rsp->hdr.Status = STATUS_BAD_NETWORK_PATH; + rsp->hdr.Status = STATUS_BAD_NETWORK_NAME; break; case -ENOMEM: case KSMBD_TREE_CONN_STATUS_NOMEM: @@ -2328,15 +2330,15 @@ static int smb2_remove_smb_xattrs(struct path *path) name += strlen(name) + 1) { ksmbd_debug(SMB, "%s, len %zd\n", name, strlen(name)); - if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) && - strncmp(&name[XATTR_USER_PREFIX_LEN], DOS_ATTRIBUTE_PREFIX, - DOS_ATTRIBUTE_PREFIX_LEN) && - strncmp(&name[XATTR_USER_PREFIX_LEN], STREAM_PREFIX, STREAM_PREFIX_LEN)) - continue; - - err = ksmbd_vfs_remove_xattr(user_ns, path->dentry, name); - if (err) - ksmbd_debug(SMB, "remove xattr failed : %s\n", name); + if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) && + !strncmp(&name[XATTR_USER_PREFIX_LEN], STREAM_PREFIX, + STREAM_PREFIX_LEN)) { + err = ksmbd_vfs_remove_xattr(user_ns, path->dentry, + name); + if (err) + ksmbd_debug(SMB, "remove xattr failed : %s\n", + name); + } } out: kvfree(xattr_list); @@ -3042,12 +3044,6 @@ int smb2_open(struct ksmbd_work *work) list_add(&fp->node, &fp->f_ci->m_fp_list); write_unlock(&fp->f_ci->m_lock); - rc = ksmbd_vfs_getattr(&path, &stat); - if (rc) { - generic_fillattr(user_ns, d_inode(path.dentry), &stat); - rc = 0; - } - /* Check delete pending among previous fp before oplock break */ if (ksmbd_inode_pending_delete(fp)) { rc = -EBUSY; @@ -3134,6 +3130,10 @@ int smb2_open(struct ksmbd_work *work) } } + rc = ksmbd_vfs_getattr(&path, &stat); + if (rc) + goto err_out; + if (stat.result_mask & STATX_BTIME) fp->create_time = ksmbd_UnixTimeToNT(stat.btime); else @@ -3149,9 +3149,6 @@ int smb2_open(struct ksmbd_work *work) memcpy(fp->client_guid, conn->ClientGUID, SMB2_CLIENT_GUID_SIZE); - generic_fillattr(user_ns, file_inode(fp->filp), - &stat); - rsp->StructureSize = cpu_to_le16(89); rcu_read_lock(); opinfo = rcu_dereference(fp->f_opinfo); diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c index e8c00dda42ad..71f870d497ae 100644 --- a/fs/ntfs3/attrib.c +++ b/fs/ntfs3/attrib.c @@ -84,8 +84,8 @@ static inline bool attr_must_be_resident(struct ntfs_sb_info *sbi, /* * attr_load_runs - Load all runs stored in @attr. */ -int attr_load_runs(struct ATTRIB *attr, struct ntfs_inode *ni, - struct runs_tree *run, const CLST *vcn) +static int attr_load_runs(struct ATTRIB *attr, struct ntfs_inode *ni, + struct runs_tree *run, const CLST *vcn) { int err; CLST svcn = le64_to_cpu(attr->nres.svcn); @@ -140,7 +140,10 @@ failed: } if (lcn != SPARSE_LCN) { - mark_as_free_ex(sbi, lcn, clen, trim); + if (sbi) { + /* mark bitmap range [lcn + clen) as free and trim clusters. */ + mark_as_free_ex(sbi, lcn, clen, trim); + } dn += clen; } @@ -173,7 +176,6 @@ int attr_allocate_clusters(struct ntfs_sb_info *sbi, struct runs_tree *run, { int err; CLST flen, vcn0 = vcn, pre = pre_alloc ? *pre_alloc : 0; - struct wnd_bitmap *wnd = &sbi->used.bitmap; size_t cnt = run->count; for (;;) { @@ -196,9 +198,7 @@ int attr_allocate_clusters(struct ntfs_sb_info *sbi, struct runs_tree *run, /* Add new fragment into run storage. */ if (!run_add_entry(run, vcn, lcn, flen, opt == ALLOCATE_MFT)) { /* Undo last 'ntfs_look_for_free_space' */ - down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS); - wnd_set_free(wnd, lcn, flen); - up_write(&wnd->rw_lock); + mark_as_free_ex(sbi, lcn, len, false); err = -ENOMEM; goto out; } @@ -320,7 +320,7 @@ int attr_make_nonresident(struct ntfs_inode *ni, struct ATTRIB *attr, err = ni_insert_nonresident(ni, attr_s->type, attr_name(attr_s), attr_s->name_len, run, 0, alen, - attr_s->flags, &attr, NULL); + attr_s->flags, &attr, NULL, NULL); if (err) goto out3; @@ -419,40 +419,44 @@ int attr_set_size(struct ntfs_inode *ni, enum ATTR_TYPE type, struct mft_inode *mi, *mi_b; CLST alen, vcn, lcn, new_alen, old_alen, svcn, evcn; CLST next_svcn, pre_alloc = -1, done = 0; - bool is_ext; + bool is_ext, is_bad = false; u32 align; struct MFT_REC *rec; again: + alen = 0; le_b = NULL; attr_b = ni_find_attr(ni, NULL, &le_b, type, name, name_len, NULL, &mi_b); if (!attr_b) { err = -ENOENT; - goto out; + goto bad_inode; } if (!attr_b->non_res) { err = attr_set_size_res(ni, attr_b, le_b, mi_b, new_size, run, &attr_b); - if (err || !attr_b->non_res) - goto out; + if (err) + return err; + + /* Return if file is still resident. */ + if (!attr_b->non_res) + goto ok1; /* Layout of records may be changed, so do a full search. */ goto again; } is_ext = is_attr_ext(attr_b); - -again_1: align = sbi->cluster_size; - if (is_ext) align <<= attr_b->nres.c_unit; old_valid = le64_to_cpu(attr_b->nres.valid_size); old_size = le64_to_cpu(attr_b->nres.data_size); old_alloc = le64_to_cpu(attr_b->nres.alloc_size); + +again_1: old_alen = old_alloc >> cluster_bits; new_alloc = (new_size + align - 1) & ~(u64)(align - 1); @@ -475,24 +479,27 @@ again_1: mi = mi_b; } else if (!le_b) { err = -EINVAL; - goto out; + goto bad_inode; } else { le = le_b; attr = ni_find_attr(ni, attr_b, &le, type, name, name_len, &vcn, &mi); if (!attr) { err = -EINVAL; - goto out; + goto bad_inode; } next_le_1: svcn = le64_to_cpu(attr->nres.svcn); evcn = le64_to_cpu(attr->nres.evcn); } - + /* + * Here we have: + * attr,mi,le - last attribute segment (containing 'vcn'). + * attr_b,mi_b,le_b - base (primary) attribute segment. + */ next_le: rec = mi->mrec; - err = attr_load_runs(attr, ni, run, NULL); if (err) goto out; @@ -507,6 +514,13 @@ next_le: goto ok; } + /* + * Add clusters. In simple case we have to: + * - allocate space (vcn, lcn, len) + * - update packed run in 'mi' + * - update attr->nres.evcn + * - update attr_b->nres.data_size/attr_b->nres.alloc_size + */ to_allocate = new_alen - old_alen; add_alloc_in_same_attr_seg: lcn = 0; @@ -520,9 +534,11 @@ add_alloc_in_same_attr_seg: pre_alloc = 0; if (type == ATTR_DATA && !name_len && sbi->options->prealloc) { - CLST new_alen2 = bytes_to_cluster( - sbi, get_pre_allocated(new_size)); - pre_alloc = new_alen2 - new_alen; + pre_alloc = + bytes_to_cluster( + sbi, + get_pre_allocated(new_size)) - + new_alen; } /* Get the last LCN to allocate from. */ @@ -580,7 +596,7 @@ add_alloc_in_same_attr_seg: pack_runs: err = mi_pack_runs(mi, attr, run, vcn - svcn); if (err) - goto out; + goto undo_1; next_svcn = le64_to_cpu(attr->nres.evcn) + 1; new_alloc_tmp = (u64)next_svcn << cluster_bits; @@ -614,7 +630,7 @@ pack_runs: if (type == ATTR_LIST) { err = ni_expand_list(ni); if (err) - goto out; + goto undo_2; if (next_svcn < vcn) goto pack_runs; @@ -624,8 +640,9 @@ pack_runs: if (!ni->attr_list.size) { err = ni_create_attr_list(ni); + /* In case of error layout of records is not changed. */ if (err) - goto out; + goto undo_2; /* Layout of records is changed. */ } @@ -637,48 +654,57 @@ pack_runs: /* Insert new attribute segment. */ err = ni_insert_nonresident(ni, type, name, name_len, run, next_svcn, vcn - next_svcn, - attr_b->flags, &attr, &mi); - if (err) - goto out; - - if (!is_mft) - run_truncate_head(run, evcn + 1); - - svcn = le64_to_cpu(attr->nres.svcn); - evcn = le64_to_cpu(attr->nres.evcn); + attr_b->flags, &attr, &mi, NULL); - le_b = NULL; /* * Layout of records maybe changed. * Find base attribute to update. */ + le_b = NULL; attr_b = ni_find_attr(ni, NULL, &le_b, type, name, name_len, NULL, &mi_b); if (!attr_b) { - err = -ENOENT; - goto out; + err = -EINVAL; + goto bad_inode; } - attr_b->nres.alloc_size = cpu_to_le64((u64)vcn << cluster_bits); - attr_b->nres.data_size = attr_b->nres.alloc_size; - attr_b->nres.valid_size = attr_b->nres.alloc_size; + if (err) { + /* ni_insert_nonresident failed. */ + attr = NULL; + goto undo_2; + } + + if (!is_mft) + run_truncate_head(run, evcn + 1); + + svcn = le64_to_cpu(attr->nres.svcn); + evcn = le64_to_cpu(attr->nres.evcn); + + /* + * Attribute is in consistency state. + * Save this point to restore to if next steps fail. + */ + old_valid = old_size = old_alloc = (u64)vcn << cluster_bits; + attr_b->nres.valid_size = attr_b->nres.data_size = + attr_b->nres.alloc_size = cpu_to_le64(old_size); mi_b->dirty = true; goto again_1; } if (new_size != old_size || (new_alloc != old_alloc && !keep_prealloc)) { + /* + * Truncate clusters. In simple case we have to: + * - update packed run in 'mi' + * - update attr->nres.evcn + * - update attr_b->nres.data_size/attr_b->nres.alloc_size + * - mark and trim clusters as free (vcn, lcn, len) + */ + CLST dlen = 0; + vcn = max(svcn, new_alen); new_alloc_tmp = (u64)vcn << cluster_bits; - alen = 0; - err = run_deallocate_ex(sbi, run, vcn, evcn - vcn + 1, &alen, - true); - if (err) - goto out; - - run_truncate(run, vcn); - if (vcn > svcn) { err = mi_pack_runs(mi, attr, run, vcn - svcn); if (err) @@ -697,7 +723,7 @@ pack_runs: if (!al_remove_le(ni, le)) { err = -EINVAL; - goto out; + goto bad_inode; } le = (struct ATTR_LIST_ENTRY *)((u8 *)le - le_sz); @@ -723,12 +749,20 @@ pack_runs: attr_b->nres.valid_size = attr_b->nres.alloc_size; } + mi_b->dirty = true; - if (is_ext) + err = run_deallocate_ex(sbi, run, vcn, evcn - vcn + 1, &dlen, + true); + if (err) + goto out; + + if (is_ext) { + /* dlen - really deallocated clusters. */ le64_sub_cpu(&attr_b->nres.total_size, - ((u64)alen << cluster_bits)); + ((u64)dlen << cluster_bits)); + } - mi_b->dirty = true; + run_truncate(run, vcn); if (new_alloc_tmp <= new_alloc) goto ok; @@ -747,7 +781,7 @@ pack_runs: if (le->type != type || le->name_len != name_len || memcmp(le_name(le), name, name_len * sizeof(short))) { err = -EINVAL; - goto out; + goto bad_inode; } err = ni_load_mi(ni, le, &mi); @@ -757,7 +791,7 @@ pack_runs: attr = mi_find_attr(mi, NULL, type, name, name_len, &le->id); if (!attr) { err = -EINVAL; - goto out; + goto bad_inode; } goto next_le_1; } @@ -772,13 +806,13 @@ ok: } } -out: - if (!err && attr_b && ret) +ok1: + if (ret) *ret = attr_b; /* Update inode_set_bytes. */ - if (!err && ((type == ATTR_DATA && !name_len) || - (type == ATTR_ALLOC && name == I30_NAME))) { + if (((type == ATTR_DATA && !name_len) || + (type == ATTR_ALLOC && name == I30_NAME))) { bool dirty = false; if (ni->vfs_inode.i_size != new_size) { @@ -786,7 +820,7 @@ out: dirty = true; } - if (attr_b && attr_b->non_res) { + if (attr_b->non_res) { new_alloc = le64_to_cpu(attr_b->nres.alloc_size); if (inode_get_bytes(&ni->vfs_inode) != new_alloc) { inode_set_bytes(&ni->vfs_inode, new_alloc); @@ -800,6 +834,47 @@ out: } } + return 0; + +undo_2: + vcn -= alen; + attr_b->nres.data_size = cpu_to_le64(old_size); + attr_b->nres.valid_size = cpu_to_le64(old_valid); + attr_b->nres.alloc_size = cpu_to_le64(old_alloc); + + /* Restore 'attr' and 'mi'. */ + if (attr) + goto restore_run; + + if (le64_to_cpu(attr_b->nres.svcn) <= svcn && + svcn <= le64_to_cpu(attr_b->nres.evcn)) { + attr = attr_b; + le = le_b; + mi = mi_b; + } else if (!le_b) { + err = -EINVAL; + goto bad_inode; + } else { + le = le_b; + attr = ni_find_attr(ni, attr_b, &le, type, name, name_len, + &svcn, &mi); + if (!attr) + goto bad_inode; + } + +restore_run: + if (mi_pack_runs(mi, attr, run, evcn - svcn + 1)) + is_bad = true; + +undo_1: + run_deallocate_ex(sbi, run, vcn, alen, NULL, false); + + run_truncate(run, vcn); +out: + if (is_bad) { +bad_inode: + _ntfs_bad_inode(&ni->vfs_inode); + } return err; } @@ -855,7 +930,7 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, goto out; } - asize = le64_to_cpu(attr_b->nres.alloc_size) >> sbi->cluster_bits; + asize = le64_to_cpu(attr_b->nres.alloc_size) >> cluster_bits; if (vcn >= asize) { err = -EINVAL; goto out; @@ -1047,7 +1122,7 @@ ins_ext: if (evcn1 > next_svcn) { err = ni_insert_nonresident(ni, ATTR_DATA, NULL, 0, run, next_svcn, evcn1 - next_svcn, - attr_b->flags, &attr, &mi); + attr_b->flags, &attr, &mi, NULL); if (err) goto out; } @@ -1173,7 +1248,7 @@ int attr_load_runs_range(struct ntfs_inode *ni, enum ATTR_TYPE type, { struct ntfs_sb_info *sbi = ni->mi.sbi; u8 cluster_bits = sbi->cluster_bits; - CLST vcn = from >> cluster_bits; + CLST vcn; CLST vcn_last = (to - 1) >> cluster_bits; CLST lcn, clen; int err; @@ -1647,7 +1722,7 @@ ins_ext: if (evcn1 > next_svcn) { err = ni_insert_nonresident(ni, ATTR_DATA, NULL, 0, run, next_svcn, evcn1 - next_svcn, - attr_b->flags, &attr, &mi); + attr_b->flags, &attr, &mi, NULL); if (err) goto out; } @@ -1812,18 +1887,12 @@ int attr_collapse_range(struct ntfs_inode *ni, u64 vbo, u64 bytes) err = ni_insert_nonresident( ni, ATTR_DATA, NULL, 0, run, next_svcn, evcn1 - eat - next_svcn, a_flags, &attr, - &mi); + &mi, &le); if (err) goto out; /* Layout of records maybe changed. */ attr_b = NULL; - le = al_find_ex(ni, NULL, ATTR_DATA, NULL, 0, - &next_svcn); - if (!le) { - err = -EINVAL; - goto out; - } } /* Free all allocated memory. */ @@ -1918,7 +1987,7 @@ next_attr: out: up_write(&ni->file.run_lock); if (err) - make_bad_inode(&ni->vfs_inode); + _ntfs_bad_inode(&ni->vfs_inode); return err; } @@ -1936,9 +2005,11 @@ int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes, u32 *frame_size) struct ATTRIB *attr = NULL, *attr_b; struct ATTR_LIST_ENTRY *le, *le_b; struct mft_inode *mi, *mi_b; - CLST svcn, evcn1, vcn, len, end, alen, dealloc; + CLST svcn, evcn1, vcn, len, end, alen, hole, next_svcn; u64 total_size, alloc_size; u32 mask; + __le16 a_flags; + struct runs_tree run2; if (!bytes) return 0; @@ -1990,6 +2061,9 @@ int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes, u32 *frame_size) } down_write(&ni->file.run_lock); + run_init(&run2); + run_truncate(run, 0); + /* * Enumerate all attribute segments and punch hole where necessary. */ @@ -1997,10 +2071,11 @@ int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes, u32 *frame_size) vcn = vbo >> sbi->cluster_bits; len = bytes >> sbi->cluster_bits; end = vcn + len; - dealloc = 0; + hole = 0; svcn = le64_to_cpu(attr_b->nres.svcn); evcn1 = le64_to_cpu(attr_b->nres.evcn) + 1; + a_flags = attr_b->flags; if (svcn <= vcn && vcn < evcn1) { attr = attr_b; @@ -2008,14 +2083,14 @@ int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes, u32 *frame_size) mi = mi_b; } else if (!le_b) { err = -EINVAL; - goto out; + goto bad_inode; } else { le = le_b; attr = ni_find_attr(ni, attr_b, &le, ATTR_DATA, NULL, 0, &vcn, &mi); if (!attr) { err = -EINVAL; - goto out; + goto bad_inode; } svcn = le64_to_cpu(attr->nres.svcn); @@ -2023,49 +2098,91 @@ int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes, u32 *frame_size) } while (svcn < end) { - CLST vcn1, zero, dealloc2; + CLST vcn1, zero, hole2 = hole; err = attr_load_runs(attr, ni, run, &svcn); if (err) - goto out; + goto done; vcn1 = max(vcn, svcn); zero = min(end, evcn1) - vcn1; - dealloc2 = dealloc; - err = run_deallocate_ex(sbi, run, vcn1, zero, &dealloc, true); + /* + * Check range [vcn1 + zero). + * Calculate how many clusters there are. + * Don't do any destructive actions. + */ + err = run_deallocate_ex(NULL, run, vcn1, zero, &hole2, false); if (err) - goto out; + goto done; - if (dealloc2 == dealloc) { - /* Looks like the required range is already sparsed. */ - } else { - if (!run_add_entry(run, vcn1, SPARSE_LCN, zero, - false)) { - err = -ENOMEM; - goto out; - } + /* Check if required range is already hole. */ + if (hole2 == hole) + goto next_attr; + + /* Make a clone of run to undo. */ + err = run_clone(run, &run2); + if (err) + goto done; + + /* Make a hole range (sparse) [vcn1 + zero). */ + if (!run_add_entry(run, vcn1, SPARSE_LCN, zero, false)) { + err = -ENOMEM; + goto done; + } - err = mi_pack_runs(mi, attr, run, evcn1 - svcn); + /* Update run in attribute segment. */ + err = mi_pack_runs(mi, attr, run, evcn1 - svcn); + if (err) + goto done; + next_svcn = le64_to_cpu(attr->nres.evcn) + 1; + if (next_svcn < evcn1) { + /* Insert new attribute segment. */ + err = ni_insert_nonresident(ni, ATTR_DATA, NULL, 0, run, + next_svcn, + evcn1 - next_svcn, a_flags, + &attr, &mi, &le); if (err) - goto out; + goto undo_punch; + + /* Layout of records maybe changed. */ + attr_b = NULL; } + + /* Real deallocate. Should not fail. */ + run_deallocate_ex(sbi, &run2, vcn1, zero, &hole, true); + +next_attr: /* Free all allocated memory. */ run_truncate(run, 0); if (evcn1 >= alen) break; + /* Get next attribute segment. */ attr = ni_enum_attr_ex(ni, attr, &le, &mi); if (!attr) { err = -EINVAL; - goto out; + goto bad_inode; } svcn = le64_to_cpu(attr->nres.svcn); evcn1 = le64_to_cpu(attr->nres.evcn) + 1; } - total_size -= (u64)dealloc << sbi->cluster_bits; +done: + if (!hole) + goto out; + + if (!attr_b) { + attr_b = ni_find_attr(ni, NULL, NULL, ATTR_DATA, NULL, 0, NULL, + &mi_b); + if (!attr_b) { + err = -EINVAL; + goto bad_inode; + } + } + + total_size -= (u64)hole << sbi->cluster_bits; attr_b->nres.total_size = cpu_to_le64(total_size); mi_b->dirty = true; @@ -2075,9 +2192,263 @@ int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes, u32 *frame_size) mark_inode_dirty(&ni->vfs_inode); out: + run_close(&run2); up_write(&ni->file.run_lock); + return err; + +bad_inode: + _ntfs_bad_inode(&ni->vfs_inode); + goto out; + +undo_punch: + /* + * Restore packed runs. + * 'mi_pack_runs' should not fail, cause we restore original. + */ + if (mi_pack_runs(mi, attr, &run2, evcn1 - svcn)) + goto bad_inode; + + goto done; +} + +/* + * attr_insert_range - Insert range (hole) in file. + * Not for normal files. + */ +int attr_insert_range(struct ntfs_inode *ni, u64 vbo, u64 bytes) +{ + int err = 0; + struct runs_tree *run = &ni->file.run; + struct ntfs_sb_info *sbi = ni->mi.sbi; + struct ATTRIB *attr = NULL, *attr_b; + struct ATTR_LIST_ENTRY *le, *le_b; + struct mft_inode *mi, *mi_b; + CLST vcn, svcn, evcn1, len, next_svcn; + u64 data_size, alloc_size; + u32 mask; + __le16 a_flags; + + if (!bytes) + return 0; + + le_b = NULL; + attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, &mi_b); + if (!attr_b) + return -ENOENT; + + if (!is_attr_ext(attr_b)) { + /* It was checked above. See fallocate. */ + return -EOPNOTSUPP; + } + + if (!attr_b->non_res) { + data_size = le32_to_cpu(attr_b->res.data_size); + alloc_size = data_size; + mask = sbi->cluster_mask; /* cluster_size - 1 */ + } else { + data_size = le64_to_cpu(attr_b->nres.data_size); + alloc_size = le64_to_cpu(attr_b->nres.alloc_size); + mask = (sbi->cluster_size << attr_b->nres.c_unit) - 1; + } + + if (vbo > data_size) { + /* Insert range after the file size is not allowed. */ + return -EINVAL; + } + + if ((vbo & mask) || (bytes & mask)) { + /* Allow to insert only frame aligned ranges. */ + return -EINVAL; + } + + /* + * valid_size <= data_size <= alloc_size + * Check alloc_size for maximum possible. + */ + if (bytes > sbi->maxbytes_sparse - alloc_size) + return -EFBIG; + + vcn = vbo >> sbi->cluster_bits; + len = bytes >> sbi->cluster_bits; + + down_write(&ni->file.run_lock); + + if (!attr_b->non_res) { + err = attr_set_size(ni, ATTR_DATA, NULL, 0, run, + data_size + bytes, NULL, false, NULL); + + le_b = NULL; + attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, + &mi_b); + if (!attr_b) { + err = -EINVAL; + goto bad_inode; + } + + if (err) + goto out; + + if (!attr_b->non_res) { + /* Still resident. */ + char *data = Add2Ptr(attr_b, attr_b->res.data_off); + + memmove(data + bytes, data, bytes); + memset(data, 0, bytes); + goto done; + } + + /* Resident files becomes nonresident. */ + data_size = le64_to_cpu(attr_b->nres.data_size); + alloc_size = le64_to_cpu(attr_b->nres.alloc_size); + } + + /* + * Enumerate all attribute segments and shift start vcn. + */ + a_flags = attr_b->flags; + svcn = le64_to_cpu(attr_b->nres.svcn); + evcn1 = le64_to_cpu(attr_b->nres.evcn) + 1; + + if (svcn <= vcn && vcn < evcn1) { + attr = attr_b; + le = le_b; + mi = mi_b; + } else if (!le_b) { + err = -EINVAL; + goto bad_inode; + } else { + le = le_b; + attr = ni_find_attr(ni, attr_b, &le, ATTR_DATA, NULL, 0, &vcn, + &mi); + if (!attr) { + err = -EINVAL; + goto bad_inode; + } + + svcn = le64_to_cpu(attr->nres.svcn); + evcn1 = le64_to_cpu(attr->nres.evcn) + 1; + } + + run_truncate(run, 0); /* clear cached values. */ + err = attr_load_runs(attr, ni, run, NULL); + if (err) + goto out; + + if (!run_insert_range(run, vcn, len)) { + err = -ENOMEM; + goto out; + } + + /* Try to pack in current record as much as possible. */ + err = mi_pack_runs(mi, attr, run, evcn1 + len - svcn); if (err) - make_bad_inode(&ni->vfs_inode); + goto out; + + next_svcn = le64_to_cpu(attr->nres.evcn) + 1; + + while ((attr = ni_enum_attr_ex(ni, attr, &le, &mi)) && + attr->type == ATTR_DATA && !attr->name_len) { + le64_add_cpu(&attr->nres.svcn, len); + le64_add_cpu(&attr->nres.evcn, len); + if (le) { + le->vcn = attr->nres.svcn; + ni->attr_list.dirty = true; + } + mi->dirty = true; + } + + if (next_svcn < evcn1 + len) { + err = ni_insert_nonresident(ni, ATTR_DATA, NULL, 0, run, + next_svcn, evcn1 + len - next_svcn, + a_flags, NULL, NULL, NULL); + + le_b = NULL; + attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, + &mi_b); + if (!attr_b) { + err = -EINVAL; + goto bad_inode; + } + + if (err) { + /* ni_insert_nonresident failed. Try to undo. */ + goto undo_insert_range; + } + } + + /* + * Update primary attribute segment. + */ + if (vbo <= ni->i_valid) + ni->i_valid += bytes; + + attr_b->nres.data_size = le64_to_cpu(data_size + bytes); + attr_b->nres.alloc_size = le64_to_cpu(alloc_size + bytes); + + /* ni->valid may be not equal valid_size (temporary). */ + if (ni->i_valid > data_size + bytes) + attr_b->nres.valid_size = attr_b->nres.data_size; + else + attr_b->nres.valid_size = cpu_to_le64(ni->i_valid); + mi_b->dirty = true; + +done: + ni->vfs_inode.i_size += bytes; + ni->ni_flags |= NI_FLAG_UPDATE_PARENT; + mark_inode_dirty(&ni->vfs_inode); + +out: + run_truncate(run, 0); /* clear cached values. */ + + up_write(&ni->file.run_lock); return err; + +bad_inode: + _ntfs_bad_inode(&ni->vfs_inode); + goto out; + +undo_insert_range: + svcn = le64_to_cpu(attr_b->nres.svcn); + evcn1 = le64_to_cpu(attr_b->nres.evcn) + 1; + + if (svcn <= vcn && vcn < evcn1) { + attr = attr_b; + le = le_b; + mi = mi_b; + } else if (!le_b) { + goto bad_inode; + } else { + le = le_b; + attr = ni_find_attr(ni, attr_b, &le, ATTR_DATA, NULL, 0, &vcn, + &mi); + if (!attr) { + goto bad_inode; + } + + svcn = le64_to_cpu(attr->nres.svcn); + evcn1 = le64_to_cpu(attr->nres.evcn) + 1; + } + + if (attr_load_runs(attr, ni, run, NULL)) + goto bad_inode; + + if (!run_collapse_range(run, vcn, len)) + goto bad_inode; + + if (mi_pack_runs(mi, attr, run, evcn1 + len - svcn)) + goto bad_inode; + + while ((attr = ni_enum_attr_ex(ni, attr, &le, &mi)) && + attr->type == ATTR_DATA && !attr->name_len) { + le64_sub_cpu(&attr->nres.svcn, len); + le64_sub_cpu(&attr->nres.evcn, len); + if (le) { + le->vcn = attr->nres.svcn; + ni->attr_list.dirty = true; + } + mi->dirty = true; + } + + goto out; } diff --git a/fs/ntfs3/bitmap.c b/fs/ntfs3/bitmap.c index aa184407520f..5d44ceac855b 100644 --- a/fs/ntfs3/bitmap.c +++ b/fs/ntfs3/bitmap.c @@ -51,11 +51,6 @@ void ntfs3_exit_bitmap(void) kmem_cache_destroy(ntfs_enode_cachep); } -static inline u32 wnd_bits(const struct wnd_bitmap *wnd, size_t i) -{ - return i + 1 == wnd->nwnd ? wnd->bits_last : wnd->sb->s_blocksize * 8; -} - /* * wnd_scan * @@ -1333,9 +1328,7 @@ int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits) if (!new_free) return -ENOMEM; - if (new_free != wnd->free_bits) - memcpy(new_free, wnd->free_bits, - wnd->nwnd * sizeof(short)); + memcpy(new_free, wnd->free_bits, wnd->nwnd * sizeof(short)); memset(new_free + wnd->nwnd, 0, (new_wnd - wnd->nwnd) * sizeof(short)); kfree(wnd->free_bits); @@ -1395,9 +1388,8 @@ int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits) void wnd_zone_set(struct wnd_bitmap *wnd, size_t lcn, size_t len) { - size_t zlen; + size_t zlen = wnd->zone_end - wnd->zone_bit; - zlen = wnd->zone_end - wnd->zone_bit; if (zlen) wnd_add_free_ext(wnd, wnd->zone_bit, zlen, false); diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 4a21745711fe..4f2ffc7ef296 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -530,21 +530,35 @@ static int ntfs_truncate(struct inode *inode, loff_t new_size) static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) { struct inode *inode = file->f_mapping->host; + struct address_space *mapping = inode->i_mapping; struct super_block *sb = inode->i_sb; struct ntfs_sb_info *sbi = sb->s_fs_info; struct ntfs_inode *ni = ntfs_i(inode); loff_t end = vbo + len; loff_t vbo_down = round_down(vbo, PAGE_SIZE); - loff_t i_size; + bool is_supported_holes = is_sparsed(ni) || is_compressed(ni); + loff_t i_size, new_size; + bool map_locked; int err; /* No support for dir. */ if (!S_ISREG(inode->i_mode)) return -EOPNOTSUPP; - /* Return error if mode is not supported. */ - if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | - FALLOC_FL_COLLAPSE_RANGE)) { + /* + * vfs_fallocate checks all possible combinations of mode. + * Do additional checks here before ntfs_set_state(dirty). + */ + if (mode & FALLOC_FL_PUNCH_HOLE) { + if (!is_supported_holes) + return -EOPNOTSUPP; + } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { + } else if (mode & FALLOC_FL_INSERT_RANGE) { + if (!is_supported_holes) + return -EOPNOTSUPP; + } else if (mode & + ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | + FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)) { ntfs_inode_warn(inode, "fallocate(0x%x) is not supported", mode); return -EOPNOTSUPP; @@ -554,6 +568,8 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) inode_lock(inode); i_size = inode->i_size; + new_size = max(end, i_size); + map_locked = false; if (WARN_ON(ni->ni_flags & NI_FLAG_COMPRESSED_MASK)) { /* Should never be here, see ntfs_file_open. */ @@ -561,38 +577,27 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) goto out; } + if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | + FALLOC_FL_INSERT_RANGE)) { + inode_dio_wait(inode); + filemap_invalidate_lock(mapping); + map_locked = true; + } + if (mode & FALLOC_FL_PUNCH_HOLE) { u32 frame_size; loff_t mask, vbo_a, end_a, tmp; - if (!(mode & FALLOC_FL_KEEP_SIZE)) { - err = -EINVAL; - goto out; - } - - err = filemap_write_and_wait_range(inode->i_mapping, vbo, - end - 1); + err = filemap_write_and_wait_range(mapping, vbo, end - 1); if (err) goto out; - err = filemap_write_and_wait_range(inode->i_mapping, end, - LLONG_MAX); + err = filemap_write_and_wait_range(mapping, end, LLONG_MAX); if (err) goto out; - inode_dio_wait(inode); - truncate_pagecache(inode, vbo_down); - if (!is_sparsed(ni) && !is_compressed(ni)) { - /* - * Normal file, can't make hole. - * TODO: Try to find way to save info about hole. - */ - err = -EOPNOTSUPP; - goto out; - } - ni_lock(ni); err = attr_punch_hole(ni, vbo, len, &frame_size); ni_unlock(ni); @@ -624,17 +629,11 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) ni_unlock(ni); } } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { - if (mode & ~FALLOC_FL_COLLAPSE_RANGE) { - err = -EINVAL; - goto out; - } - /* * Write tail of the last page before removed range since * it will get removed from the page cache below. */ - err = filemap_write_and_wait_range(inode->i_mapping, vbo_down, - vbo); + err = filemap_write_and_wait_range(mapping, vbo_down, vbo); if (err) goto out; @@ -642,34 +641,58 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) * Write data that will be shifted to preserve them * when discarding page cache below. */ - err = filemap_write_and_wait_range(inode->i_mapping, end, - LLONG_MAX); + err = filemap_write_and_wait_range(mapping, end, LLONG_MAX); if (err) goto out; - /* Wait for existing dio to complete. */ - inode_dio_wait(inode); - truncate_pagecache(inode, vbo_down); ni_lock(ni); err = attr_collapse_range(ni, vbo, len); ni_unlock(ni); + } else if (mode & FALLOC_FL_INSERT_RANGE) { + /* Check new size. */ + err = inode_newsize_ok(inode, new_size); + if (err) + goto out; + + /* Write out all dirty pages. */ + err = filemap_write_and_wait_range(mapping, vbo_down, + LLONG_MAX); + if (err) + goto out; + truncate_pagecache(inode, vbo_down); + + ni_lock(ni); + err = attr_insert_range(ni, vbo, len); + ni_unlock(ni); } else { - /* - * Normal file: Allocate clusters, do not change 'valid' size. - */ - loff_t new_size = max(end, i_size); + /* Check new size. */ + + /* generic/213: expected -ENOSPC instead of -EFBIG. */ + if (!is_supported_holes) { + loff_t to_alloc = new_size - inode_get_bytes(inode); + + if (to_alloc > 0 && + (to_alloc >> sbi->cluster_bits) > + wnd_zeroes(&sbi->used.bitmap)) { + err = -ENOSPC; + goto out; + } + } err = inode_newsize_ok(inode, new_size); if (err) goto out; + /* + * Allocate clusters, do not change 'valid' size. + */ err = ntfs_set_size(inode, new_size); if (err) goto out; - if (is_sparsed(ni) || is_compressed(ni)) { + if (is_supported_holes) { CLST vcn_v = ni->i_valid >> sbi->cluster_bits; CLST vcn = vbo >> sbi->cluster_bits; CLST cend = bytes_to_cluster(sbi, end); @@ -717,8 +740,8 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) } out: - if (err == -EFBIG) - err = -ENOSPC; + if (map_locked) + filemap_invalidate_unlock(mapping); if (!err) { inode->i_ctime = inode->i_mtime = current_time(inode); @@ -989,7 +1012,6 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from) if (bytes > count) bytes = count; - frame = pos >> frame_bits; frame_vbo = pos & ~(frame_size - 1); index = frame_vbo >> PAGE_SHIFT; diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index 18842998c8fa..381a38a06ec2 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -7,6 +7,7 @@ #include <linux/fiemap.h> #include <linux/fs.h> +#include <linux/minmax.h> #include <linux/vmalloc.h> #include "debug.h" @@ -468,7 +469,7 @@ ni_ins_new_attr(struct ntfs_inode *ni, struct mft_inode *mi, &ref, &le); if (err) { /* No memory or no space. */ - return NULL; + return ERR_PTR(err); } le_added = true; @@ -649,6 +650,7 @@ static int ni_try_remove_attr_list(struct ntfs_inode *ni) struct mft_inode *mi; u32 asize, free; struct MFT_REF ref; + struct MFT_REC *mrec; __le16 id; if (!ni->attr_list.dirty) @@ -692,11 +694,17 @@ static int ni_try_remove_attr_list(struct ntfs_inode *ni) free -= asize; } + /* Make a copy of primary record to restore if error. */ + mrec = kmemdup(ni->mi.mrec, sbi->record_size, GFP_NOFS); + if (!mrec) + return 0; /* Not critical. */ + /* It seems that attribute list can be removed from primary record. */ mi_remove_attr(NULL, &ni->mi, attr_list); /* - * Repeat the cycle above and move all attributes to primary record. + * Repeat the cycle above and copy all attributes to primary record. + * Do not remove original attributes from subrecords! * It should be success! */ le = NULL; @@ -707,14 +715,14 @@ static int ni_try_remove_attr_list(struct ntfs_inode *ni) mi = ni_find_mi(ni, ino_get(&le->ref)); if (!mi) { /* Should never happened, 'cause already checked. */ - goto bad; + goto out; } attr = mi_find_attr(mi, NULL, le->type, le_name(le), le->name_len, &le->id); if (!attr) { /* Should never happened, 'cause already checked. */ - goto bad; + goto out; } asize = le32_to_cpu(attr->size); @@ -724,18 +732,33 @@ static int ni_try_remove_attr_list(struct ntfs_inode *ni) le16_to_cpu(attr->name_off)); if (!attr_ins) { /* - * Internal error. - * Either no space in primary record (already checked). - * Either tried to insert another - * non indexed attribute (logic error). + * No space in primary record (already checked). */ - goto bad; + goto out; } /* Copy all except id. */ id = attr_ins->id; memcpy(attr_ins, attr, asize); attr_ins->id = id; + } + + /* + * Repeat the cycle above and remove all attributes from subrecords. + */ + le = NULL; + while ((le = al_enumerate(ni, le))) { + if (!memcmp(&le->ref, &ref, sizeof(ref))) + continue; + + mi = ni_find_mi(ni, ino_get(&le->ref)); + if (!mi) + continue; + + attr = mi_find_attr(mi, NULL, le->type, le_name(le), + le->name_len, &le->id); + if (!attr) + continue; /* Remove from original record. */ mi_remove_attr(NULL, mi, attr); @@ -748,11 +771,13 @@ static int ni_try_remove_attr_list(struct ntfs_inode *ni) ni->attr_list.le = NULL; ni->attr_list.dirty = false; + kfree(mrec); + return 0; +out: + /* Restore primary record. */ + swap(mrec, ni->mi.mrec); + kfree(mrec); return 0; -bad: - ntfs_inode_err(&ni->vfs_inode, "Internal error"); - make_bad_inode(&ni->vfs_inode); - return -EINVAL; } /* @@ -986,6 +1011,8 @@ static int ni_ins_attr_ext(struct ntfs_inode *ni, struct ATTR_LIST_ENTRY *le, name_off, svcn, ins_le); if (!attr) continue; + if (IS_ERR(attr)) + return PTR_ERR(attr); if (ins_attr) *ins_attr = attr; @@ -1007,8 +1034,15 @@ insert_ext: attr = ni_ins_new_attr(ni, mi, le, type, name, name_len, asize, name_off, svcn, ins_le); - if (!attr) + if (!attr) { + err = -EINVAL; goto out2; + } + + if (IS_ERR(attr)) { + err = PTR_ERR(attr); + goto out2; + } if (ins_attr) *ins_attr = attr; @@ -1020,10 +1054,9 @@ insert_ext: out2: ni_remove_mi(ni, mi); mi_put(mi); - err = -EINVAL; out1: - ntfs_mark_rec_free(sbi, rno); + ntfs_mark_rec_free(sbi, rno, is_mft); out: return err; @@ -1076,6 +1109,11 @@ static int ni_insert_attr(struct ntfs_inode *ni, enum ATTR_TYPE type, if (asize <= free) { attr = ni_ins_new_attr(ni, &ni->mi, NULL, type, name, name_len, asize, name_off, svcn, ins_le); + if (IS_ERR(attr)) { + err = PTR_ERR(attr); + goto out; + } + if (attr) { if (ins_attr) *ins_attr = attr; @@ -1173,6 +1211,11 @@ static int ni_insert_attr(struct ntfs_inode *ni, enum ATTR_TYPE type, goto out; } + if (IS_ERR(attr)) { + err = PTR_ERR(attr); + goto out; + } + if (ins_attr) *ins_attr = attr; if (ins_mi) @@ -1218,7 +1261,7 @@ static int ni_expand_mft_list(struct ntfs_inode *ni) mft_min = mft_new; mi_min = mi_new; } else { - ntfs_mark_rec_free(sbi, mft_new); + ntfs_mark_rec_free(sbi, mft_new, true); mft_new = 0; ni_remove_mi(ni, mi_new); } @@ -1262,7 +1305,7 @@ static int ni_expand_mft_list(struct ntfs_inode *ni) done = asize - run_size - SIZEOF_NONRESIDENT; le32_sub_cpu(&ni->mi.mrec->used, done); - /* Estimate the size of second part: run_buf=NULL. */ + /* Estimate packed size (run_buf=NULL). */ err = run_pack(run, svcn, evcn + 1 - svcn, NULL, sbi->record_size, &plen); if (err < 0) @@ -1288,10 +1331,16 @@ static int ni_expand_mft_list(struct ntfs_inode *ni) goto out; } + if (IS_ERR(attr)) { + err = PTR_ERR(attr); + goto out; + } + attr->non_res = 1; attr->name_off = SIZEOF_NONRESIDENT_LE; attr->flags = 0; + /* This function can't fail - cause already checked above. */ run_pack(run, svcn, evcn + 1 - svcn, Add2Ptr(attr, SIZEOF_NONRESIDENT), run_size, &plen); @@ -1301,7 +1350,7 @@ static int ni_expand_mft_list(struct ntfs_inode *ni) out: if (mft_new) { - ntfs_mark_rec_free(sbi, mft_new); + ntfs_mark_rec_free(sbi, mft_new, true); ni_remove_mi(ni, mi_new); } @@ -1367,8 +1416,6 @@ int ni_expand_list(struct ntfs_inode *ni) /* Split MFT data as much as possible. */ err = ni_expand_mft_list(ni); - if (err) - goto out; out: return !err && !done ? -EOPNOTSUPP : err; @@ -1381,7 +1428,7 @@ int ni_insert_nonresident(struct ntfs_inode *ni, enum ATTR_TYPE type, const __le16 *name, u8 name_len, const struct runs_tree *run, CLST svcn, CLST len, __le16 flags, struct ATTRIB **new_attr, - struct mft_inode **mi) + struct mft_inode **mi, struct ATTR_LIST_ENTRY **le) { int err; CLST plen; @@ -1394,6 +1441,7 @@ int ni_insert_nonresident(struct ntfs_inode *ni, enum ATTR_TYPE type, u32 run_size, asize; struct ntfs_sb_info *sbi = ni->mi.sbi; + /* Estimate packed size (run_buf=NULL). */ err = run_pack(run, svcn, len, NULL, sbi->max_bytes_per_attr - run_off, &plen); if (err < 0) @@ -1414,7 +1462,7 @@ int ni_insert_nonresident(struct ntfs_inode *ni, enum ATTR_TYPE type, } err = ni_insert_attr(ni, type, name, name_len, asize, name_off, svcn, - &attr, mi, NULL); + &attr, mi, le); if (err) goto out; @@ -1423,12 +1471,12 @@ int ni_insert_nonresident(struct ntfs_inode *ni, enum ATTR_TYPE type, attr->name_off = cpu_to_le16(name_off); attr->flags = flags; + /* This function can't fail - cause already checked above. */ run_pack(run, svcn, len, Add2Ptr(attr, run_off), run_size, &plen); attr->nres.svcn = cpu_to_le64(svcn); attr->nres.evcn = cpu_to_le64((u64)svcn + len - 1); - err = 0; if (new_attr) *new_attr = attr; @@ -1560,7 +1608,7 @@ int ni_delete_all(struct ntfs_inode *ni) mi->dirty = true; mi_write(mi, 0); - ntfs_mark_rec_free(sbi, mi->rno); + ntfs_mark_rec_free(sbi, mi->rno, false); ni_remove_mi(ni, mi); mi_put(mi); node = next; @@ -1571,7 +1619,7 @@ int ni_delete_all(struct ntfs_inode *ni) ni->mi.dirty = true; err = mi_write(&ni->mi, 0); - ntfs_mark_rec_free(sbi, ni->mi.rno); + ntfs_mark_rec_free(sbi, ni->mi.rno, false); return err; } @@ -1589,7 +1637,8 @@ struct ATTR_FILE_NAME *ni_fname_name(struct ntfs_inode *ni, struct ATTRIB *attr = NULL; struct ATTR_FILE_NAME *fname; - *le = NULL; + if (le) + *le = NULL; /* Enumerate all names. */ next: @@ -1605,7 +1654,7 @@ next: goto next; if (!uni) - goto next; + return fname; if (uni->len != fname->name_len) goto next; @@ -2302,10 +2351,8 @@ remove_wof: out: kfree(pages); - if (err) { - make_bad_inode(inode); - ntfs_set_state(sbi, NTFS_DIRTY_ERROR); - } + if (err) + _ntfs_bad_inode(inode); return err; } @@ -2944,7 +2991,7 @@ bool ni_remove_name_undo(struct ntfs_inode *dir_ni, struct ntfs_inode *ni, } /* - * ni_add_name - Add new name in MFT and in directory. + * ni_add_name - Add new name into MFT and into directory. */ int ni_add_name(struct ntfs_inode *dir_ni, struct ntfs_inode *ni, struct NTFS_DE *de) @@ -2953,13 +3000,20 @@ int ni_add_name(struct ntfs_inode *dir_ni, struct ntfs_inode *ni, struct ATTRIB *attr; struct ATTR_LIST_ENTRY *le; struct mft_inode *mi; + struct ATTR_FILE_NAME *fname; struct ATTR_FILE_NAME *de_name = (struct ATTR_FILE_NAME *)(de + 1); u16 de_key_size = le16_to_cpu(de->key_size); mi_get_ref(&ni->mi, &de->ref); mi_get_ref(&dir_ni->mi, &de_name->home); - /* Insert new name in MFT. */ + /* Fill duplicate from any ATTR_NAME. */ + fname = ni_fname_name(ni, NULL, NULL, NULL, NULL); + if (fname) + memcpy(&de_name->dup, &fname->dup, sizeof(fname->dup)); + de_name->dup.fa = ni->std_fa; + + /* Insert new name into MFT. */ err = ni_insert_resident(ni, de_key_size, ATTR_NAME, NULL, 0, &attr, &mi, &le); if (err) @@ -2967,7 +3021,7 @@ int ni_add_name(struct ntfs_inode *dir_ni, struct ntfs_inode *ni, memcpy(Add2Ptr(attr, SIZEOF_RESIDENT), de_name, de_key_size); - /* Insert new name in directory. */ + /* Insert new name into directory. */ err = indx_insert_entry(&dir_ni->dir, dir_ni, de, ni->mi.sbi, NULL, 0); if (err) ni_remove_attr_le(ni, attr, mi, le); @@ -2991,7 +3045,7 @@ int ni_rename(struct ntfs_inode *dir_ni, struct ntfs_inode *new_dir_ni, * 1) Add new name and remove old name. * 2) Remove old name and add new name. * - * In most cases (not all!) adding new name in MFT and in directory can + * In most cases (not all!) adding new name into MFT and into directory can * allocate additional cluster(s). * Second way may result to bad inode if we can't add new name * and then can't restore (add) old name. @@ -3261,7 +3315,7 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint) err = err2; if (is_empty) { - ntfs_mark_rec_free(sbi, mi->rno); + ntfs_mark_rec_free(sbi, mi->rno, false); rb_erase(node, &ni->mi_tree); mi_put(mi); } diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c index 49b7df616778..e7c494005122 100644 --- a/fs/ntfs3/fslog.c +++ b/fs/ntfs3/fslog.c @@ -3843,6 +3843,8 @@ int log_replay(struct ntfs_inode *ni, bool *initialized) memset(&rst_info2, 0, sizeof(struct restart_info)); err = log_read_rst(log, l_size, false, &rst_info2); + if (err) + goto out; /* Determine which restart area to use. */ if (!rst_info2.restart || rst_info2.last_lsn <= rst_info.last_lsn) @@ -5057,7 +5059,7 @@ undo_action_next: goto add_allocated_vcns; vcn = le64_to_cpu(lrh->target_vcn); - vcn &= ~(log->clst_per_page - 1); + vcn &= ~(u64)(log->clst_per_page - 1); add_allocated_vcns: for (i = 0, vcn = le64_to_cpu(lrh->target_vcn), diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index 1835e35199c2..4ed15f64b17f 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -703,12 +703,14 @@ out: /* * ntfs_mark_rec_free - Mark record as free. + * is_mft - true if we are changing MFT */ -void ntfs_mark_rec_free(struct ntfs_sb_info *sbi, CLST rno) +void ntfs_mark_rec_free(struct ntfs_sb_info *sbi, CLST rno, bool is_mft) { struct wnd_bitmap *wnd = &sbi->mft.bitmap; - down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_MFT); + if (!is_mft) + down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_MFT); if (rno >= wnd->nbits) goto out; @@ -727,7 +729,8 @@ void ntfs_mark_rec_free(struct ntfs_sb_info *sbi, CLST rno) sbi->mft.next_free = rno; out: - up_write(&wnd->rw_lock); + if (!is_mft) + up_write(&wnd->rw_lock); } /* @@ -780,7 +783,7 @@ out: */ int ntfs_refresh_zone(struct ntfs_sb_info *sbi) { - CLST zone_limit, zone_max, lcn, vcn, len; + CLST lcn, vcn, len; size_t lcn_s, zlen; struct wnd_bitmap *wnd = &sbi->used.bitmap; struct ntfs_inode *ni = sbi->mft.ni; @@ -789,16 +792,6 @@ int ntfs_refresh_zone(struct ntfs_sb_info *sbi) if (wnd_zone_len(wnd)) return 0; - /* - * Compute the MFT zone at two steps. - * It would be nice if we are able to allocate 1/8 of - * total clusters for MFT but not more then 512 MB. - */ - zone_limit = (512 * 1024 * 1024) >> sbi->cluster_bits; - zone_max = wnd->nbits >> 3; - if (zone_max > zone_limit) - zone_max = zone_limit; - vcn = bytes_to_cluster(sbi, (u64)sbi->mft.bitmap.nbits << sbi->record_bits); @@ -812,13 +805,7 @@ int ntfs_refresh_zone(struct ntfs_sb_info *sbi) lcn_s = lcn + 1; /* Try to allocate clusters after last MFT run. */ - zlen = wnd_find(wnd, zone_max, lcn_s, 0, &lcn_s); - if (!zlen) { - ntfs_notice(sbi->sb, "MftZone: unavailable"); - return 0; - } - - /* Truncate too large zone. */ + zlen = wnd_find(wnd, sbi->zone_max, lcn_s, 0, &lcn_s); wnd_zone_set(wnd, lcn_s, zlen); return 0; @@ -827,16 +814,21 @@ int ntfs_refresh_zone(struct ntfs_sb_info *sbi) /* * ntfs_update_mftmirr - Update $MFTMirr data. */ -int ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait) +void ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait) { int err; struct super_block *sb = sbi->sb; - u32 blocksize = sb->s_blocksize; + u32 blocksize; sector_t block1, block2; u32 bytes; + if (!sb) + return; + + blocksize = sb->s_blocksize; + if (!(sbi->flags & NTFS_FLAGS_MFTMIRR)) - return 0; + return; err = 0; bytes = sbi->mft.recs_mirr << sbi->record_bits; @@ -847,16 +839,13 @@ int ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait) struct buffer_head *bh1, *bh2; bh1 = sb_bread(sb, block1++); - if (!bh1) { - err = -EIO; - goto out; - } + if (!bh1) + return; bh2 = sb_getblk(sb, block2++); if (!bh2) { put_bh(bh1); - err = -EIO; - goto out; + return; } if (buffer_locked(bh2)) @@ -876,13 +865,24 @@ int ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait) put_bh(bh2); if (err) - goto out; + return; } sbi->flags &= ~NTFS_FLAGS_MFTMIRR; +} -out: - return err; +/* + * ntfs_bad_inode + * + * Marks inode as bad and marks fs as 'dirty' + */ +void ntfs_bad_inode(struct inode *inode, const char *hint) +{ + struct ntfs_sb_info *sbi = inode->i_sb->s_fs_info; + + ntfs_inode_err(inode, "%s", hint); + make_bad_inode(inode); + ntfs_set_state(sbi, NTFS_DIRTY_ERROR); } /* @@ -1395,7 +1395,7 @@ int ntfs_write_bh(struct ntfs_sb_info *sbi, struct NTFS_RECORD_HEADER *rhdr, if (buffer_locked(bh)) __wait_on_buffer(bh); - lock_buffer(nb->bh[idx]); + lock_buffer(bh); bh_data = bh->b_data + off; end_data = Add2Ptr(bh_data, op); @@ -2424,7 +2424,7 @@ static inline void ntfs_unmap_and_discard(struct ntfs_sb_info *sbi, CLST lcn, void mark_as_free_ex(struct ntfs_sb_info *sbi, CLST lcn, CLST len, bool trim) { - CLST end, i; + CLST end, i, zone_len, zlen; struct wnd_bitmap *wnd = &sbi->used.bitmap; down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS); @@ -2459,6 +2459,28 @@ void mark_as_free_ex(struct ntfs_sb_info *sbi, CLST lcn, CLST len, bool trim) ntfs_unmap_and_discard(sbi, lcn, len); wnd_set_free(wnd, lcn, len); + /* append to MFT zone, if possible. */ + zone_len = wnd_zone_len(wnd); + zlen = min(zone_len + len, sbi->zone_max); + + if (zlen == zone_len) { + /* MFT zone already has maximum size. */ + } else if (!zone_len) { + /* Create MFT zone only if 'zlen' is large enough. */ + if (zlen == sbi->zone_max) + wnd_zone_set(wnd, lcn, zlen); + } else { + CLST zone_lcn = wnd_zone_bit(wnd); + + if (lcn + len == zone_lcn) { + /* Append into head MFT zone. */ + wnd_zone_set(wnd, lcn, zlen); + } else if (zone_lcn + zone_len == lcn) { + /* Append into tail MFT zone. */ + wnd_zone_set(wnd, zone_lcn, zlen); + } + } + out: up_write(&wnd->rw_lock); } diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c index 6f81e3a49abf..440328147e7e 100644 --- a/fs/ntfs3/index.c +++ b/fs/ntfs3/index.c @@ -1042,19 +1042,16 @@ int indx_find(struct ntfs_index *indx, struct ntfs_inode *ni, { int err; struct NTFS_DE *e; - const struct INDEX_HDR *hdr; struct indx_node *node; if (!root) root = indx_get_root(&ni->dir, ni, NULL, NULL); if (!root) { - err = -EINVAL; - goto out; + /* Should not happen. */ + return -EINVAL; } - hdr = &root->ihdr; - /* Check cache. */ e = fnd->level ? fnd->de[fnd->level - 1] : fnd->root_de; if (e && !de_is_last(e) && @@ -1068,39 +1065,35 @@ int indx_find(struct ntfs_index *indx, struct ntfs_inode *ni, fnd_clear(fnd); /* Lookup entry that is <= to the search value. */ - e = hdr_find_e(indx, hdr, key, key_len, ctx, diff); + e = hdr_find_e(indx, &root->ihdr, key, key_len, ctx, diff); if (!e) return -EINVAL; fnd->root_de = e; - err = 0; for (;;) { node = NULL; - if (*diff >= 0 || !de_has_vcn_ex(e)) { - *entry = e; - goto out; - } + if (*diff >= 0 || !de_has_vcn_ex(e)) + break; /* Read next level. */ err = indx_read(indx, ni, de_get_vbn(e), &node); if (err) - goto out; + return err; /* Lookup entry that is <= to the search value. */ e = hdr_find_e(indx, &node->index->ihdr, key, key_len, ctx, diff); if (!e) { - err = -EINVAL; put_indx_node(node); - goto out; + return -EINVAL; } fnd_push(fnd, node, e); } -out: - return err; + *entry = e; + return 0; } int indx_find_sort(struct ntfs_index *indx, struct ntfs_inode *ni, @@ -1354,7 +1347,7 @@ static int indx_create_allocate(struct ntfs_index *indx, struct ntfs_inode *ni, goto out; err = ni_insert_nonresident(ni, ATTR_ALLOC, in->name, in->name_len, - &run, 0, len, 0, &alloc, NULL); + &run, 0, len, 0, &alloc, NULL, NULL); if (err) goto out1; @@ -1685,8 +1678,8 @@ indx_insert_into_buffer(struct ntfs_index *indx, struct ntfs_inode *ni, { int err; const struct NTFS_DE *sp; - struct NTFS_DE *e, *de_t, *up_e = NULL; - struct indx_node *n2 = NULL; + struct NTFS_DE *e, *de_t, *up_e; + struct indx_node *n2; struct indx_node *n1 = fnd->nodes[level]; struct INDEX_HDR *hdr1 = &n1->index->ihdr; struct INDEX_HDR *hdr2; @@ -1994,7 +1987,7 @@ static int indx_free_children(struct ntfs_index *indx, struct ntfs_inode *ni, const struct NTFS_DE *e, bool trim) { int err; - struct indx_node *n; + struct indx_node *n = NULL; struct INDEX_HDR *hdr; CLST vbn = de_get_vbn(e); size_t i; diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index 80104afeb2cd..51363d4e8636 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -430,6 +430,7 @@ end_enum: } else if (fname && fname->home.low == cpu_to_le32(MFT_REC_EXTEND) && fname->home.seq == cpu_to_le16(MFT_REC_EXTEND)) { /* Records in $Extend are not a files or general directories. */ + inode->i_op = &ntfs_file_inode_operations; } else { err = -EINVAL; goto out; @@ -500,7 +501,7 @@ struct inode *ntfs_iget5(struct super_block *sb, const struct MFT_REF *ref, inode = ntfs_read_mft(inode, name, ref); else if (ref->seq != ntfs_i(inode)->mi.mrec->seq) { /* Inode overlaps? */ - make_bad_inode(inode); + _ntfs_bad_inode(inode); } return inode; @@ -1632,7 +1633,7 @@ out4: ni->mi.dirty = false; discard_new_inode(inode); out3: - ntfs_mark_rec_free(sbi, ino); + ntfs_mark_rec_free(sbi, ino, false); out2: __putname(new_de); @@ -1655,7 +1656,6 @@ int ntfs_link_inode(struct inode *inode, struct dentry *dentry) struct ntfs_inode *ni = ntfs_i(inode); struct ntfs_sb_info *sbi = inode->i_sb->s_fs_info; struct NTFS_DE *de; - struct ATTR_FILE_NAME *de_name; /* Allocate PATH_MAX bytes. */ de = __getname(); @@ -1670,15 +1670,6 @@ int ntfs_link_inode(struct inode *inode, struct dentry *dentry) if (err) goto out; - de_name = (struct ATTR_FILE_NAME *)(de + 1); - /* Fill duplicate info. */ - de_name->dup.cr_time = de_name->dup.m_time = de_name->dup.c_time = - de_name->dup.a_time = kernel2nt(&inode->i_ctime); - de_name->dup.alloc_size = de_name->dup.data_size = - cpu_to_le64(inode->i_size); - de_name->dup.fa = ni->std_fa; - de_name->dup.ea_size = de_name->dup.reparse = 0; - err = ni_add_name(ntfs_i(d_inode(dentry->d_parent)), ni, de); out: __putname(de); @@ -1731,9 +1722,7 @@ int ntfs_unlink_inode(struct inode *dir, const struct dentry *dentry) if (inode->i_nlink) mark_inode_dirty(inode); } else if (!ni_remove_name_undo(dir_ni, ni, de, de2, undo_remove)) { - make_bad_inode(inode); - ntfs_inode_err(inode, "failed to undo unlink"); - ntfs_set_state(sbi, NTFS_DIRTY_ERROR); + _ntfs_bad_inode(inode); } else { if (ni_is_dirty(dir)) mark_inode_dirty(dir); diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c index bc741213ad84..bc22cc321a74 100644 --- a/fs/ntfs3/namei.c +++ b/fs/ntfs3/namei.c @@ -208,7 +208,7 @@ static int ntfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, } /* - * ntfs_rmdir - inode_operations::rm_dir + * ntfs_rmdir - inode_operations::rmdir */ static int ntfs_rmdir(struct inode *dir, struct dentry *dentry) { @@ -308,9 +308,7 @@ static int ntfs_rename(struct user_namespace *mnt_userns, struct inode *dir, err = ni_rename(dir_ni, new_dir_ni, ni, de, new_de, &is_bad); if (is_bad) { /* Restore after failed rename failed too. */ - make_bad_inode(inode); - ntfs_inode_err(inode, "failed to undo rename"); - ntfs_set_state(sbi, NTFS_DIRTY_ERROR); + _ntfs_bad_inode(inode); } else if (!err) { inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(dir); diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 8dbdca03e1af..2c791222c4e2 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -220,6 +220,7 @@ struct ntfs_sb_info { u32 flags; // See NTFS_FLAGS_XXX. + CLST zone_max; // Maximum MFT zone length in clusters CLST bad_clusters; // The count of marked bad clusters. u16 max_bytes_per_attr; // Maximum attribute size in record. @@ -408,8 +409,6 @@ enum REPARSE_SIGN { }; /* Functions from attrib.c */ -int attr_load_runs(struct ATTRIB *attr, struct ntfs_inode *ni, - struct runs_tree *run, const CLST *vcn); int attr_allocate_clusters(struct ntfs_sb_info *sbi, struct runs_tree *run, CLST vcn, CLST lcn, CLST len, CLST *pre_alloc, enum ALLOCATE_OPT opt, CLST *alen, const size_t fr, @@ -440,6 +439,7 @@ int attr_is_frame_compressed(struct ntfs_inode *ni, struct ATTRIB *attr, int attr_allocate_frame(struct ntfs_inode *ni, CLST frame, size_t compr_size, u64 new_valid); int attr_collapse_range(struct ntfs_inode *ni, u64 vbo, u64 bytes); +int attr_insert_range(struct ntfs_inode *ni, u64 vbo, u64 bytes); int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes, u32 *frame_size); /* Functions from attrlist.c */ @@ -528,7 +528,7 @@ int ni_insert_nonresident(struct ntfs_inode *ni, enum ATTR_TYPE type, const __le16 *name, u8 name_len, const struct runs_tree *run, CLST svcn, CLST len, __le16 flags, struct ATTRIB **new_attr, - struct mft_inode **mi); + struct mft_inode **mi, struct ATTR_LIST_ENTRY **le); int ni_insert_resident(struct ntfs_inode *ni, u32 data_size, enum ATTR_TYPE type, const __le16 *name, u8 name_len, struct ATTRIB **new_attr, struct mft_inode **mi, @@ -589,10 +589,12 @@ int ntfs_look_for_free_space(struct ntfs_sb_info *sbi, CLST lcn, CLST len, enum ALLOCATE_OPT opt); int ntfs_look_free_mft(struct ntfs_sb_info *sbi, CLST *rno, bool mft, struct ntfs_inode *ni, struct mft_inode **mi); -void ntfs_mark_rec_free(struct ntfs_sb_info *sbi, CLST rno); +void ntfs_mark_rec_free(struct ntfs_sb_info *sbi, CLST rno, bool is_mft); int ntfs_clear_mft_tail(struct ntfs_sb_info *sbi, size_t from, size_t to); int ntfs_refresh_zone(struct ntfs_sb_info *sbi); -int ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait); +void ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait); +void ntfs_bad_inode(struct inode *inode, const char *hint); +#define _ntfs_bad_inode(i) ntfs_bad_inode(i, __func__) enum NTFS_DIRTY_FLAGS { NTFS_DIRTY_CLEAR = 0, NTFS_DIRTY_DIRTY = 1, @@ -738,7 +740,6 @@ static inline struct ATTRIB *rec_find_attr_le(struct mft_inode *rec, int mi_write(struct mft_inode *mi, int wait); int mi_format_new(struct mft_inode *mi, struct ntfs_sb_info *sbi, CLST rno, __le16 flags, bool is_mft); -void mi_mark_free(struct mft_inode *mi); struct ATTRIB *mi_insert_attr(struct mft_inode *mi, enum ATTR_TYPE type, const __le16 *name, u8 name_len, u32 asize, u16 name_off); @@ -780,10 +781,10 @@ bool run_lookup_entry(const struct runs_tree *run, CLST vcn, CLST *lcn, void run_truncate(struct runs_tree *run, CLST vcn); void run_truncate_head(struct runs_tree *run, CLST vcn); void run_truncate_around(struct runs_tree *run, CLST vcn); -bool run_lookup(const struct runs_tree *run, CLST vcn, size_t *Index); bool run_add_entry(struct runs_tree *run, CLST vcn, CLST lcn, CLST len, bool is_mft); bool run_collapse_range(struct runs_tree *run, CLST vcn, CLST len); +bool run_insert_range(struct runs_tree *run, CLST vcn, CLST len); bool run_get_entry(const struct runs_tree *run, size_t index, CLST *vcn, CLST *lcn, CLST *len); bool run_is_mapped_full(const struct runs_tree *run, CLST svcn, CLST evcn); @@ -802,6 +803,7 @@ int run_unpack_ex(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino, #define run_unpack_ex run_unpack #endif int run_get_highest_vcn(CLST vcn, const u8 *run_buf, u64 *highest_vcn); +int run_clone(const struct runs_tree *run, struct runs_tree *new_run); /* Globals from super.c */ void *ntfs_set_shared(void *ptr, u32 bytes); diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c index 861e35791506..7d2fac5ee215 100644 --- a/fs/ntfs3/record.c +++ b/fs/ntfs3/record.c @@ -395,28 +395,6 @@ int mi_format_new(struct mft_inode *mi, struct ntfs_sb_info *sbi, CLST rno, } /* - * mi_mark_free - Mark record as unused and marks it as free in bitmap. - */ -void mi_mark_free(struct mft_inode *mi) -{ - CLST rno = mi->rno; - struct ntfs_sb_info *sbi = mi->sbi; - - if (rno >= MFT_REC_RESERVED && rno < MFT_REC_FREE) { - ntfs_clear_mft_tail(sbi, rno, rno + 1); - mi->dirty = false; - return; - } - - if (mi->mrec) { - clear_rec_inuse(mi->mrec); - mi->dirty = true; - mi_write(mi, 0); - } - ntfs_mark_rec_free(sbi, rno); -} - -/* * mi_insert_attr - Reserve space for new attribute. * * Return: Not full constructed attribute or NULL if not possible to create. @@ -445,12 +423,11 @@ struct ATTRIB *mi_insert_attr(struct mft_inode *mi, enum ATTR_TYPE type, attr = NULL; while ((attr = mi_enum_attr(mi, attr))) { diff = compare_attr(attr, type, name, name_len, upcase); - if (diff > 0) - break; + if (diff < 0) continue; - if (!is_attr_indexed(attr)) + if (!diff && !is_attr_indexed(attr)) return NULL; break; } diff --git a/fs/ntfs3/run.c b/fs/ntfs3/run.c index a8fec651f973..aaaa0d3d35a2 100644 --- a/fs/ntfs3/run.c +++ b/fs/ntfs3/run.c @@ -31,7 +31,7 @@ struct ntfs_run { * Case of entry missing from list 'index' will be set to * point to insertion position for the entry question. */ -bool run_lookup(const struct runs_tree *run, CLST vcn, size_t *index) +static bool run_lookup(const struct runs_tree *run, CLST vcn, size_t *index) { size_t min_idx, max_idx, mid_idx; struct ntfs_run *r; @@ -547,6 +547,48 @@ bool run_collapse_range(struct runs_tree *run, CLST vcn, CLST len) return true; } +/* run_insert_range + * + * Helper for attr_insert_range(), + * which is helper for fallocate(insert_range). + */ +bool run_insert_range(struct runs_tree *run, CLST vcn, CLST len) +{ + size_t index; + struct ntfs_run *r, *e; + + if (WARN_ON(!run_lookup(run, vcn, &index))) + return false; /* Should never be here. */ + + e = run->runs + run->count; + r = run->runs + index; + + if (vcn > r->vcn) + r += 1; + + for (; r < e; r++) + r->vcn += len; + + r = run->runs + index; + + if (vcn > r->vcn) { + /* split fragment. */ + CLST len1 = vcn - r->vcn; + CLST len2 = r->len - len1; + CLST lcn2 = r->lcn == SPARSE_LCN ? SPARSE_LCN : (r->lcn + len1); + + r->len = len1; + + if (!run_add_entry(run, vcn + len, lcn2, len2, false)) + return false; + } + + if (!run_add_entry(run, vcn, SPARSE_LCN, len, false)) + return false; + + return true; +} + /* * run_get_entry - Return index-th mapped region. */ @@ -778,26 +820,36 @@ int run_pack(const struct runs_tree *run, CLST svcn, CLST len, u8 *run_buf, CLST next_vcn, vcn, lcn; CLST prev_lcn = 0; CLST evcn1 = svcn + len; + const struct ntfs_run *r, *r_end; int packed_size = 0; size_t i; - bool ok; s64 dlcn; int offset_size, size_size, tmp; - next_vcn = vcn = svcn; - *packed_vcns = 0; if (!len) goto out; - ok = run_lookup_entry(run, vcn, &lcn, &len, &i); + /* Check all required entries [svcn, encv1) available. */ + if (!run_lookup(run, svcn, &i)) + return -ENOENT; + + r_end = run->runs + run->count; + r = run->runs + i; - if (!ok) - goto error; + for (next_vcn = r->vcn + r->len; next_vcn < evcn1; + next_vcn = r->vcn + r->len) { + if (++r >= r_end || r->vcn != next_vcn) + return -ENOENT; + } - if (next_vcn != vcn) - goto error; + /* Repeat cycle above and pack runs. Assume no errors. */ + r = run->runs + i; + len = svcn - r->vcn; + vcn = svcn; + lcn = r->lcn == SPARSE_LCN ? SPARSE_LCN : (r->lcn + len); + len = r->len - len; for (;;) { next_vcn = vcn + len; @@ -846,12 +898,10 @@ int run_pack(const struct runs_tree *run, CLST svcn, CLST len, u8 *run_buf, if (packed_size + 1 >= run_buf_size || next_vcn >= evcn1) goto out; - ok = run_get_entry(run, ++i, &vcn, &lcn, &len); - if (!ok) - goto error; - - if (next_vcn != vcn) - goto error; + r += 1; + vcn = r->vcn; + lcn = r->lcn; + len = r->len; } out: @@ -860,9 +910,6 @@ out: run_buf[0] = 0; return packed_size + 1; - -error: - return -EOPNOTSUPP; } /* @@ -1109,3 +1156,28 @@ int run_get_highest_vcn(CLST vcn, const u8 *run_buf, u64 *highest_vcn) *highest_vcn = vcn64 - 1; return 0; } + +/* + * run_clone + * + * Make a copy of run + */ +int run_clone(const struct runs_tree *run, struct runs_tree *new_run) +{ + size_t bytes = run->count * sizeof(struct ntfs_run); + + if (bytes > new_run->allocated) { + struct ntfs_run *new_ptr = kvmalloc(bytes, GFP_KERNEL); + + if (!new_ptr) + return -ENOMEM; + + kvfree(new_run->runs); + new_run->runs = new_ptr; + new_run->allocated = bytes; + } + + memcpy(new_run->runs, run->runs, bytes); + new_run->count = run->count; + return 0; +} diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index 0c6de6287737..47012c9bf505 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -30,6 +30,7 @@ #include <linux/fs_context.h> #include <linux/fs_parser.h> #include <linux/log2.h> +#include <linux/minmax.h> #include <linux/module.h> #include <linux/nls.h> #include <linux/seq_file.h> @@ -390,7 +391,7 @@ static int ntfs_fs_reconfigure(struct fs_context *fc) return -EINVAL; } - memcpy(sbi->options, new_opts, sizeof(*new_opts)); + swap(sbi->options, fc->fs_private); return 0; } @@ -870,6 +871,13 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, sb->s_maxbytes = 0xFFFFFFFFull << sbi->cluster_bits; #endif + /* + * Compute the MFT zone at two steps. + * It would be nice if we are able to allocate 1/8 of + * total clusters for MFT but not more then 512 MB. + */ + sbi->zone_max = min_t(CLST, 0x20000000 >> sbi->cluster_bits, clusters >> 3); + err = 0; out: @@ -900,6 +908,8 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) ref.high = 0; sbi->sb = sb; + sbi->options = fc->fs_private; + fc->fs_private = NULL; sb->s_flags |= SB_NODIRATIME; sb->s_magic = 0x7366746e; // "ntfs" sb->s_op = &ntfs_sops; @@ -1262,8 +1272,6 @@ load_root: goto put_inode_out; } - fc->fs_private = NULL; - return 0; put_inode_out: @@ -1378,7 +1386,7 @@ static const struct fs_context_operations ntfs_context_ops = { /* * ntfs_init_fs_context - Initialize spi and opts * - * This will called when mount/remount. We will first initiliaze + * This will called when mount/remount. We will first initialize * options so that if remount we can use just that. */ static int ntfs_init_fs_context(struct fs_context *fc) @@ -1416,7 +1424,6 @@ static int ntfs_init_fs_context(struct fs_context *fc) mutex_init(&sbi->compress.mtx_lzx); #endif - sbi->options = opts; fc->s_fs_info = sbi; ok: fc->fs_private = opts; diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c index 5e0e0280e70d..5bdff12a1232 100644 --- a/fs/ntfs3/xattr.c +++ b/fs/ntfs3/xattr.c @@ -118,7 +118,7 @@ static int ntfs_read_ea(struct ntfs_inode *ni, struct EA_FULL **ea, run_init(&run); - err = attr_load_runs(attr_ea, ni, &run, NULL); + err = attr_load_runs_range(ni, ATTR_EA, NULL, 0, &run, 0, size); if (!err) err = ntfs_read_run_nb(sbi, &run, 0, ea_p, size, NULL); run_close(&run); @@ -444,6 +444,11 @@ update_ea: /* Delete xattr, ATTR_EA */ ni_remove_attr_le(ni, attr, mi, le); } else if (attr->non_res) { + err = attr_load_runs_range(ni, ATTR_EA, NULL, 0, &ea_run, 0, + size); + if (err) + goto out; + err = ntfs_sb_write_run(sbi, &ea_run, 0, ea_all, size, 0); if (err) goto out; @@ -547,28 +552,23 @@ static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns, { const char *name; size_t size, name_len; - void *value = NULL; - int err = 0; + void *value; + int err; int flags; + umode_t mode; if (S_ISLNK(inode->i_mode)) return -EOPNOTSUPP; + mode = inode->i_mode; switch (type) { case ACL_TYPE_ACCESS: /* Do not change i_mode if we are in init_acl */ if (acl && !init_acl) { - umode_t mode; - err = posix_acl_update_mode(mnt_userns, inode, &mode, &acl); if (err) - goto out; - - if (inode->i_mode != mode) { - inode->i_mode = mode; - mark_inode_dirty(inode); - } + return err; } name = XATTR_NAME_POSIX_ACL_ACCESS; name_len = sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1; @@ -604,8 +604,13 @@ static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns, err = ntfs_set_ea(inode, name, name_len, value, size, flags, 0); if (err == -ENODATA && !size) err = 0; /* Removing non existed xattr. */ - if (!err) + if (!err) { set_cached_acl(inode, type, acl); + if (inode->i_mode != mode) { + inode->i_mode = mode; + mark_inode_dirty(inode); + } + } out: kfree(value); @@ -706,13 +711,13 @@ int ntfs_init_acl(struct user_namespace *mnt_userns, struct inode *inode, inode->i_default_acl = NULL; } - if (!acl) - inode->i_acl = NULL; - else { + if (acl) { if (!err) err = ntfs_set_acl_ex(mnt_userns, inode, acl, ACL_TYPE_ACCESS, true); posix_acl_release(acl); + } else { + inode->i_acl = NULL; } return err; diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h index 3096f086b5a3..71ab4ba9c25d 100644 --- a/include/asm-generic/bitops/atomic.h +++ b/include/asm-generic/bitops/atomic.h @@ -39,9 +39,6 @@ arch_test_and_set_bit(unsigned int nr, volatile unsigned long *p) unsigned long mask = BIT_MASK(nr); p += BIT_WORD(nr); - if (READ_ONCE(*p) & mask) - return 1; - old = arch_atomic_long_fetch_or(mask, (atomic_long_t *)p); return !!(old & mask); } @@ -53,9 +50,6 @@ arch_test_and_clear_bit(unsigned int nr, volatile unsigned long *p) unsigned long mask = BIT_MASK(nr); p += BIT_WORD(nr); - if (!(READ_ONCE(*p) & mask)) - return 0; - old = arch_atomic_long_fetch_andnot(mask, (atomic_long_t *)p); return !!(old & mask); } diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index effee1dc715a..92294a5fb083 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -857,7 +857,6 @@ void blk_mq_kick_requeue_list(struct request_queue *q); void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs); void blk_mq_complete_request(struct request *rq); bool blk_mq_complete_request_remote(struct request *rq); -bool blk_mq_queue_stopped(struct request_queue *q); void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_stop_hw_queues(struct request_queue *q); diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 0d435d0edbcb..bd047864c7ac 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -202,12 +202,13 @@ static inline unsigned int cpumask_local_spread(unsigned int i, int node) return 0; } -static inline int cpumask_any_and_distribute(const struct cpumask *src1p, - const struct cpumask *src2p) { +static inline unsigned int cpumask_any_and_distribute(const struct cpumask *src1p, + const struct cpumask *src2p) +{ return cpumask_first_and(src1p, src2p); } -static inline int cpumask_any_distribute(const struct cpumask *srcp) +static inline unsigned int cpumask_any_distribute(const struct cpumask *srcp) { return cpumask_first(srcp); } @@ -261,7 +262,26 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p, (cpu) = cpumask_next_zero((cpu), (mask)), \ (cpu) < nr_cpu_ids;) +#if NR_CPUS == 1 +static inline +unsigned int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap) +{ + cpumask_check(start); + if (n != -1) + cpumask_check(n); + + /* + * Return the first available CPU when wrapping, or when starting before cpu0, + * since there is only one valid option. + */ + if (wrap && n >= 0) + return nr_cpumask_bits; + + return cpumask_first(mask); +} +#else unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap); +#endif /** * for_each_cpu_wrap - iterate over every cpu in a mask, starting at a specified location diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1c480b1821e1..f4519d3689e1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -656,12 +656,12 @@ struct kvm_irq_routing_table { }; #endif -#ifndef KVM_PRIVATE_MEM_SLOTS -#define KVM_PRIVATE_MEM_SLOTS 0 +#ifndef KVM_INTERNAL_MEM_SLOTS +#define KVM_INTERNAL_MEM_SLOTS 0 #endif #define KVM_MEM_SLOTS_NUM SHRT_MAX -#define KVM_USER_MEM_SLOTS (KVM_MEM_SLOTS_NUM - KVM_PRIVATE_MEM_SLOTS) +#define KVM_USER_MEM_SLOTS (KVM_MEM_SLOTS_NUM - KVM_INTERNAL_MEM_SLOTS) #ifndef __KVM_VCPU_MULTIPLE_ADDRESS_SPACE static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu) @@ -765,10 +765,10 @@ struct kvm { #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) struct mmu_notifier mmu_notifier; - unsigned long mmu_notifier_seq; - long mmu_notifier_count; - unsigned long mmu_notifier_range_start; - unsigned long mmu_notifier_range_end; + unsigned long mmu_invalidate_seq; + long mmu_invalidate_in_progress; + unsigned long mmu_invalidate_range_start; + unsigned long mmu_invalidate_range_end; #endif struct list_head devices; u64 manual_dirty_log_protect; @@ -1357,10 +1357,10 @@ void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc); void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc); #endif -void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start, - unsigned long end); -void kvm_dec_notifier_count(struct kvm *kvm, unsigned long start, - unsigned long end); +void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start, + unsigned long end); +void kvm_mmu_invalidate_end(struct kvm *kvm, unsigned long start, + unsigned long end); long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); @@ -1907,42 +1907,44 @@ extern const struct kvm_stats_header kvm_vcpu_stats_header; extern const struct _kvm_stats_desc kvm_vcpu_stats_desc[]; #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) -static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq) +static inline int mmu_invalidate_retry(struct kvm *kvm, unsigned long mmu_seq) { - if (unlikely(kvm->mmu_notifier_count)) + if (unlikely(kvm->mmu_invalidate_in_progress)) return 1; /* - * Ensure the read of mmu_notifier_count happens before the read - * of mmu_notifier_seq. This interacts with the smp_wmb() in - * mmu_notifier_invalidate_range_end to make sure that the caller - * either sees the old (non-zero) value of mmu_notifier_count or - * the new (incremented) value of mmu_notifier_seq. - * PowerPC Book3s HV KVM calls this under a per-page lock - * rather than under kvm->mmu_lock, for scalability, so - * can't rely on kvm->mmu_lock to keep things ordered. + * Ensure the read of mmu_invalidate_in_progress happens before + * the read of mmu_invalidate_seq. This interacts with the + * smp_wmb() in mmu_notifier_invalidate_range_end to make sure + * that the caller either sees the old (non-zero) value of + * mmu_invalidate_in_progress or the new (incremented) value of + * mmu_invalidate_seq. + * + * PowerPC Book3s HV KVM calls this under a per-page lock rather + * than under kvm->mmu_lock, for scalability, so can't rely on + * kvm->mmu_lock to keep things ordered. */ smp_rmb(); - if (kvm->mmu_notifier_seq != mmu_seq) + if (kvm->mmu_invalidate_seq != mmu_seq) return 1; return 0; } -static inline int mmu_notifier_retry_hva(struct kvm *kvm, - unsigned long mmu_seq, - unsigned long hva) +static inline int mmu_invalidate_retry_hva(struct kvm *kvm, + unsigned long mmu_seq, + unsigned long hva) { lockdep_assert_held(&kvm->mmu_lock); /* - * If mmu_notifier_count is non-zero, then the range maintained by - * kvm_mmu_notifier_invalidate_range_start contains all addresses that - * might be being invalidated. Note that it may include some false + * If mmu_invalidate_in_progress is non-zero, then the range maintained + * by kvm_mmu_notifier_invalidate_range_start contains all addresses + * that might be being invalidated. Note that it may include some false * positives, due to shortcuts when handing concurrent invalidations. */ - if (unlikely(kvm->mmu_notifier_count) && - hva >= kvm->mmu_notifier_range_start && - hva < kvm->mmu_notifier_range_end) + if (unlikely(kvm->mmu_invalidate_in_progress) && + hva >= kvm->mmu_invalidate_range_start && + hva < kvm->mmu_invalidate_range_end) return 1; - if (kvm->mmu_notifier_seq != mmu_seq) + if (kvm->mmu_invalidate_seq != mmu_seq) return 1; return 0; } diff --git a/include/linux/libata.h b/include/linux/libata.h index 0269ff114f5a..698032e5ef2d 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1382,7 +1382,8 @@ extern const struct attribute_group *ata_common_sdev_groups[]; .proc_name = drv_name, \ .slave_destroy = ata_scsi_slave_destroy, \ .bios_param = ata_std_bios_param, \ - .unlock_native_capacity = ata_scsi_unlock_native_capacity + .unlock_native_capacity = ata_scsi_unlock_native_capacity,\ + .max_sectors = ATA_MAX_SECTORS_LBA48 #define ATA_SUBBASE_SHT(drv_name) \ __ATA_BASE_SHT(drv_name), \ diff --git a/include/linux/virtio.h b/include/linux/virtio.h index a3f73bb6733e..dcab9c7e8784 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -11,7 +11,7 @@ #include <linux/gfp.h> /** - * virtqueue - a queue to register buffers for sending or receiving. + * struct virtqueue - a queue to register buffers for sending or receiving. * @list: the chain of virtqueues for this device * @callback: the function to call when buffers are consumed (can be NULL). * @name: the name of this virtqueue (mainly for debugging) @@ -97,7 +97,7 @@ int virtqueue_resize(struct virtqueue *vq, u32 num, void (*recycle)(struct virtqueue *vq, void *buf)); /** - * virtio_device - representation of a device using virtio + * struct virtio_device - representation of a device using virtio * @index: unique position on the virtio bus * @failed: saved value for VIRTIO_CONFIG_S_FAILED bit (for restore) * @config_enabled: configuration change reporting enabled @@ -156,7 +156,7 @@ size_t virtio_max_dma_size(struct virtio_device *vdev); list_for_each_entry(vq, &vdev->vqs, list) /** - * virtio_driver - operations for a virtio I/O driver + * struct virtio_driver - operations for a virtio I/O driver * @driver: underlying device driver (populate name and owner). * @id_table: the ids serviced by this driver. * @feature_table: an array of feature numbers supported by this driver. diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 6adff09f7170..4b517649cfe8 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -55,7 +55,6 @@ struct virtio_shm_region { * include a NULL entry for vqs that do not need a callback * names: array of virtqueue names (mainly for debugging) * include a NULL entry for vqs unused by driver - * sizes: array of virtqueue sizes * Returns 0 on success or error status * @del_vqs: free virtqueues found by find_vqs(). * @synchronize_cbs: synchronize with the virtqueue callbacks (optional) @@ -104,9 +103,7 @@ struct virtio_config_ops { void (*reset)(struct virtio_device *vdev); int (*find_vqs)(struct virtio_device *, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], - u32 sizes[], - const bool *ctx, + const char * const names[], const bool *ctx, struct irq_affinity *desc); void (*del_vqs)(struct virtio_device *); void (*synchronize_cbs)(struct virtio_device *); @@ -215,7 +212,7 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev, const char *names[] = { n }; struct virtqueue *vq; int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL, - NULL, NULL); + NULL); if (err < 0) return ERR_PTR(err); return vq; @@ -227,8 +224,7 @@ int virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs, const char * const names[], struct irq_affinity *desc) { - return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL, - NULL, desc); + return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL, desc); } static inline @@ -237,25 +233,13 @@ int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs, const char * const names[], const bool *ctx, struct irq_affinity *desc) { - return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL, - ctx, desc); -} - -static inline -int virtio_find_vqs_ctx_size(struct virtio_device *vdev, u32 nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], - const char * const names[], - u32 sizes[], - const bool *ctx, struct irq_affinity *desc) -{ - return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, sizes, - ctx, desc); + return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, ctx, + desc); } /** * virtio_synchronize_cbs - synchronize with virtqueue callbacks - * @vdev: the device + * @dev: the virtio device */ static inline void virtio_synchronize_cbs(struct virtio_device *dev) @@ -274,7 +258,7 @@ void virtio_synchronize_cbs(struct virtio_device *dev) /** * virtio_device_ready - enable vq use in probe function - * @vdev: the device + * @dev: the virtio device * * Driver must call this to use vqs in the probe function. * @@ -322,7 +306,7 @@ const char *virtio_bus_name(struct virtio_device *vdev) /** * virtqueue_set_affinity - setting affinity for a virtqueue * @vq: the virtqueue - * @cpu: the cpu no. + * @cpu_mask: the cpu mask * * Pay attention the function are best-effort: the affinity hint may not be set * due to config support, irq type and sharing. diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 9f0bab0589d9..3827a6b395fd 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -83,6 +83,7 @@ struct neigh_parms { struct rcu_head rcu_head; int reachable_time; + int qlen; int data[NEIGH_VAR_DATA_MAX]; DECLARE_BITMAP(data_state, NEIGH_VAR_DATA_MAX); }; diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 0677cd3de034..c396a3862e80 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -95,7 +95,7 @@ struct nf_ip_net { struct netns_ct { #ifdef CONFIG_NF_CONNTRACK_EVENTS - bool ctnetlink_has_listener; + u8 ctnetlink_has_listener; bool ecache_dwork_pending; #endif u8 sysctl_log_invalid; /* Log invalid packets */ diff --git a/include/net/sock.h b/include/net/sock.h index 05a1bbdf5805..d08cfe190a78 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -578,6 +578,31 @@ static inline bool sk_user_data_is_nocopy(const struct sock *sk) #define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data))) /** + * __locked_read_sk_user_data_with_flags - return the pointer + * only if argument flags all has been set in sk_user_data. Otherwise + * return NULL + * + * @sk: socket + * @flags: flag bits + * + * The caller must be holding sk->sk_callback_lock. + */ +static inline void * +__locked_read_sk_user_data_with_flags(const struct sock *sk, + uintptr_t flags) +{ + uintptr_t sk_user_data = + (uintptr_t)rcu_dereference_check(__sk_user_data(sk), + lockdep_is_held(&sk->sk_callback_lock)); + + WARN_ON_ONCE(flags & SK_USER_DATA_PTRMASK); + + if ((sk_user_data & flags) == flags) + return (void *)(sk_user_data & SK_USER_DATA_PTRMASK); + return NULL; +} + +/** * __rcu_dereference_sk_user_data_with_flags - return the pointer * only if argument flags all has been set in sk_user_data. Otherwise * return NULL diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index ac151ecc7f19..2edea901bbd5 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -105,11 +105,6 @@ #define REG_RESERVED_ADDR 0xffffffff #define REG_RESERVED(reg) REG(reg, REG_RESERVED_ADDR) -#define for_each_stat(ocelot, stat) \ - for ((stat) = (ocelot)->stats_layout; \ - ((stat)->name[0] != '\0'); \ - (stat)++) - enum ocelot_target { ANA = 1, QS, @@ -335,13 +330,38 @@ enum ocelot_reg { SYS_COUNT_RX_64, SYS_COUNT_RX_65_127, SYS_COUNT_RX_128_255, - SYS_COUNT_RX_256_1023, + SYS_COUNT_RX_256_511, + SYS_COUNT_RX_512_1023, SYS_COUNT_RX_1024_1526, SYS_COUNT_RX_1527_MAX, SYS_COUNT_RX_PAUSE, SYS_COUNT_RX_CONTROL, SYS_COUNT_RX_LONGS, SYS_COUNT_RX_CLASSIFIED_DROPS, + SYS_COUNT_RX_RED_PRIO_0, + SYS_COUNT_RX_RED_PRIO_1, + SYS_COUNT_RX_RED_PRIO_2, + SYS_COUNT_RX_RED_PRIO_3, + SYS_COUNT_RX_RED_PRIO_4, + SYS_COUNT_RX_RED_PRIO_5, + SYS_COUNT_RX_RED_PRIO_6, + SYS_COUNT_RX_RED_PRIO_7, + SYS_COUNT_RX_YELLOW_PRIO_0, + SYS_COUNT_RX_YELLOW_PRIO_1, + SYS_COUNT_RX_YELLOW_PRIO_2, + SYS_COUNT_RX_YELLOW_PRIO_3, + SYS_COUNT_RX_YELLOW_PRIO_4, + SYS_COUNT_RX_YELLOW_PRIO_5, + SYS_COUNT_RX_YELLOW_PRIO_6, + SYS_COUNT_RX_YELLOW_PRIO_7, + SYS_COUNT_RX_GREEN_PRIO_0, + SYS_COUNT_RX_GREEN_PRIO_1, + SYS_COUNT_RX_GREEN_PRIO_2, + SYS_COUNT_RX_GREEN_PRIO_3, + SYS_COUNT_RX_GREEN_PRIO_4, + SYS_COUNT_RX_GREEN_PRIO_5, + SYS_COUNT_RX_GREEN_PRIO_6, + SYS_COUNT_RX_GREEN_PRIO_7, SYS_COUNT_TX_OCTETS, SYS_COUNT_TX_UNICAST, SYS_COUNT_TX_MULTICAST, @@ -351,11 +371,46 @@ enum ocelot_reg { SYS_COUNT_TX_PAUSE, SYS_COUNT_TX_64, SYS_COUNT_TX_65_127, - SYS_COUNT_TX_128_511, + SYS_COUNT_TX_128_255, + SYS_COUNT_TX_256_511, SYS_COUNT_TX_512_1023, SYS_COUNT_TX_1024_1526, SYS_COUNT_TX_1527_MAX, + SYS_COUNT_TX_YELLOW_PRIO_0, + SYS_COUNT_TX_YELLOW_PRIO_1, + SYS_COUNT_TX_YELLOW_PRIO_2, + SYS_COUNT_TX_YELLOW_PRIO_3, + SYS_COUNT_TX_YELLOW_PRIO_4, + SYS_COUNT_TX_YELLOW_PRIO_5, + SYS_COUNT_TX_YELLOW_PRIO_6, + SYS_COUNT_TX_YELLOW_PRIO_7, + SYS_COUNT_TX_GREEN_PRIO_0, + SYS_COUNT_TX_GREEN_PRIO_1, + SYS_COUNT_TX_GREEN_PRIO_2, + SYS_COUNT_TX_GREEN_PRIO_3, + SYS_COUNT_TX_GREEN_PRIO_4, + SYS_COUNT_TX_GREEN_PRIO_5, + SYS_COUNT_TX_GREEN_PRIO_6, + SYS_COUNT_TX_GREEN_PRIO_7, SYS_COUNT_TX_AGING, + SYS_COUNT_DROP_LOCAL, + SYS_COUNT_DROP_TAIL, + SYS_COUNT_DROP_YELLOW_PRIO_0, + SYS_COUNT_DROP_YELLOW_PRIO_1, + SYS_COUNT_DROP_YELLOW_PRIO_2, + SYS_COUNT_DROP_YELLOW_PRIO_3, + SYS_COUNT_DROP_YELLOW_PRIO_4, + SYS_COUNT_DROP_YELLOW_PRIO_5, + SYS_COUNT_DROP_YELLOW_PRIO_6, + SYS_COUNT_DROP_YELLOW_PRIO_7, + SYS_COUNT_DROP_GREEN_PRIO_0, + SYS_COUNT_DROP_GREEN_PRIO_1, + SYS_COUNT_DROP_GREEN_PRIO_2, + SYS_COUNT_DROP_GREEN_PRIO_3, + SYS_COUNT_DROP_GREEN_PRIO_4, + SYS_COUNT_DROP_GREEN_PRIO_5, + SYS_COUNT_DROP_GREEN_PRIO_6, + SYS_COUNT_DROP_GREEN_PRIO_7, SYS_RESET_CFG, SYS_SR_ETYPE_CFG, SYS_VLAN_ETYPE_CFG, @@ -538,16 +593,111 @@ enum ocelot_ptp_pins { TOD_ACC_PIN }; +enum ocelot_stat { + OCELOT_STAT_RX_OCTETS, + OCELOT_STAT_RX_UNICAST, + OCELOT_STAT_RX_MULTICAST, + OCELOT_STAT_RX_BROADCAST, + OCELOT_STAT_RX_SHORTS, + OCELOT_STAT_RX_FRAGMENTS, + OCELOT_STAT_RX_JABBERS, + OCELOT_STAT_RX_CRC_ALIGN_ERRS, + OCELOT_STAT_RX_SYM_ERRS, + OCELOT_STAT_RX_64, + OCELOT_STAT_RX_65_127, + OCELOT_STAT_RX_128_255, + OCELOT_STAT_RX_256_511, + OCELOT_STAT_RX_512_1023, + OCELOT_STAT_RX_1024_1526, + OCELOT_STAT_RX_1527_MAX, + OCELOT_STAT_RX_PAUSE, + OCELOT_STAT_RX_CONTROL, + OCELOT_STAT_RX_LONGS, + OCELOT_STAT_RX_CLASSIFIED_DROPS, + OCELOT_STAT_RX_RED_PRIO_0, + OCELOT_STAT_RX_RED_PRIO_1, + OCELOT_STAT_RX_RED_PRIO_2, + OCELOT_STAT_RX_RED_PRIO_3, + OCELOT_STAT_RX_RED_PRIO_4, + OCELOT_STAT_RX_RED_PRIO_5, + OCELOT_STAT_RX_RED_PRIO_6, + OCELOT_STAT_RX_RED_PRIO_7, + OCELOT_STAT_RX_YELLOW_PRIO_0, + OCELOT_STAT_RX_YELLOW_PRIO_1, + OCELOT_STAT_RX_YELLOW_PRIO_2, + OCELOT_STAT_RX_YELLOW_PRIO_3, + OCELOT_STAT_RX_YELLOW_PRIO_4, + OCELOT_STAT_RX_YELLOW_PRIO_5, + OCELOT_STAT_RX_YELLOW_PRIO_6, + OCELOT_STAT_RX_YELLOW_PRIO_7, + OCELOT_STAT_RX_GREEN_PRIO_0, + OCELOT_STAT_RX_GREEN_PRIO_1, + OCELOT_STAT_RX_GREEN_PRIO_2, + OCELOT_STAT_RX_GREEN_PRIO_3, + OCELOT_STAT_RX_GREEN_PRIO_4, + OCELOT_STAT_RX_GREEN_PRIO_5, + OCELOT_STAT_RX_GREEN_PRIO_6, + OCELOT_STAT_RX_GREEN_PRIO_7, + OCELOT_STAT_TX_OCTETS, + OCELOT_STAT_TX_UNICAST, + OCELOT_STAT_TX_MULTICAST, + OCELOT_STAT_TX_BROADCAST, + OCELOT_STAT_TX_COLLISION, + OCELOT_STAT_TX_DROPS, + OCELOT_STAT_TX_PAUSE, + OCELOT_STAT_TX_64, + OCELOT_STAT_TX_65_127, + OCELOT_STAT_TX_128_255, + OCELOT_STAT_TX_256_511, + OCELOT_STAT_TX_512_1023, + OCELOT_STAT_TX_1024_1526, + OCELOT_STAT_TX_1527_MAX, + OCELOT_STAT_TX_YELLOW_PRIO_0, + OCELOT_STAT_TX_YELLOW_PRIO_1, + OCELOT_STAT_TX_YELLOW_PRIO_2, + OCELOT_STAT_TX_YELLOW_PRIO_3, + OCELOT_STAT_TX_YELLOW_PRIO_4, + OCELOT_STAT_TX_YELLOW_PRIO_5, + OCELOT_STAT_TX_YELLOW_PRIO_6, + OCELOT_STAT_TX_YELLOW_PRIO_7, + OCELOT_STAT_TX_GREEN_PRIO_0, + OCELOT_STAT_TX_GREEN_PRIO_1, + OCELOT_STAT_TX_GREEN_PRIO_2, + OCELOT_STAT_TX_GREEN_PRIO_3, + OCELOT_STAT_TX_GREEN_PRIO_4, + OCELOT_STAT_TX_GREEN_PRIO_5, + OCELOT_STAT_TX_GREEN_PRIO_6, + OCELOT_STAT_TX_GREEN_PRIO_7, + OCELOT_STAT_TX_AGED, + OCELOT_STAT_DROP_LOCAL, + OCELOT_STAT_DROP_TAIL, + OCELOT_STAT_DROP_YELLOW_PRIO_0, + OCELOT_STAT_DROP_YELLOW_PRIO_1, + OCELOT_STAT_DROP_YELLOW_PRIO_2, + OCELOT_STAT_DROP_YELLOW_PRIO_3, + OCELOT_STAT_DROP_YELLOW_PRIO_4, + OCELOT_STAT_DROP_YELLOW_PRIO_5, + OCELOT_STAT_DROP_YELLOW_PRIO_6, + OCELOT_STAT_DROP_YELLOW_PRIO_7, + OCELOT_STAT_DROP_GREEN_PRIO_0, + OCELOT_STAT_DROP_GREEN_PRIO_1, + OCELOT_STAT_DROP_GREEN_PRIO_2, + OCELOT_STAT_DROP_GREEN_PRIO_3, + OCELOT_STAT_DROP_GREEN_PRIO_4, + OCELOT_STAT_DROP_GREEN_PRIO_5, + OCELOT_STAT_DROP_GREEN_PRIO_6, + OCELOT_STAT_DROP_GREEN_PRIO_7, + OCELOT_NUM_STATS, +}; + struct ocelot_stat_layout { - u32 offset; + u32 reg; char name[ETH_GSTRING_LEN]; }; -#define OCELOT_STAT_END { .name = "" } - struct ocelot_stats_region { struct list_head node; - u32 offset; + u32 base; int count; u32 *buf; }; @@ -707,7 +857,6 @@ struct ocelot { const u32 *const *map; const struct ocelot_stat_layout *stats_layout; struct list_head stats_regions; - unsigned int num_stats; u32 pool_size[OCELOT_SB_NUM][OCELOT_SB_POOL_NUM]; int packet_buffer_size; @@ -750,7 +899,7 @@ struct ocelot { struct ocelot_psfp_list psfp; /* Workqueue to check statistics for overflow with its lock */ - struct mutex stats_lock; + spinlock_t stats_lock; u64 *stats; struct delayed_work stats_work; struct workqueue_struct *stats_queue; @@ -786,8 +935,8 @@ struct ocelot_policer { u32 burst; /* bytes */ }; -#define ocelot_bulk_read_rix(ocelot, reg, ri, buf, count) \ - __ocelot_bulk_read_ix(ocelot, reg, reg##_RSZ * (ri), buf, count) +#define ocelot_bulk_read(ocelot, reg, buf, count) \ + __ocelot_bulk_read_ix(ocelot, reg, 0, buf, count) #define ocelot_read_ix(ocelot, reg, gi, ri) \ __ocelot_read_ix(ocelot, reg, reg##_GSZ * (gi) + reg##_RSZ * (ri)) diff --git a/include/uapi/linux/virtio_ring.h b/include/uapi/linux/virtio_ring.h index 476d3e5c0fe7..f8c20d3de8da 100644 --- a/include/uapi/linux/virtio_ring.h +++ b/include/uapi/linux/virtio_ring.h @@ -93,15 +93,21 @@ #define VRING_USED_ALIGN_SIZE 4 #define VRING_DESC_ALIGN_SIZE 16 -/* Virtio ring descriptors: 16 bytes. These can chain together via "next". */ +/** + * struct vring_desc - Virtio ring descriptors, + * 16 bytes long. These can chain together via @next. + * + * @addr: buffer address (guest-physical) + * @len: buffer length + * @flags: descriptor flags + * @next: index of the next descriptor in the chain, + * if the VRING_DESC_F_NEXT flag is set. We chain unused + * descriptors via this, too. + */ struct vring_desc { - /* Address (guest-physical). */ __virtio64 addr; - /* Length. */ __virtio32 len; - /* The flags as indicated above. */ __virtio16 flags; - /* We chain unused descriptors via this, too */ __virtio16 next; }; diff --git a/init/Kconfig b/init/Kconfig index 80fe60fa77fb..532362fcfe31 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -70,11 +70,7 @@ config CC_CAN_LINK_STATIC default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m64-flag) -static) if 64BIT default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m32-flag) -static) -config CC_HAS_ASM_GOTO - def_bool $(success,$(srctree)/scripts/gcc-goto.sh $(CC)) - config CC_HAS_ASM_GOTO_OUTPUT - depends on CC_HAS_ASM_GOTO def_bool $(success,echo 'int foo(int x) { asm goto ("": "=r"(x) ::: bar); return x; bar: return 0; }' | $(CC) -x c - -c -o /dev/null) config CC_HAS_ASM_GOTO_TIED_OUTPUT diff --git a/io_uring/net.c b/io_uring/net.c index 6d71748e2c5a..f8cdf1dc3863 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -116,7 +116,7 @@ static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) { struct io_async_msghdr *hdr = req->async_data; - if (!hdr || issue_flags & IO_URING_F_UNLOCKED) + if (!req_has_async_data(req) || issue_flags & IO_URING_F_UNLOCKED) return; /* Let normal cleanup path reap it if we fail adding to the cache */ @@ -152,9 +152,9 @@ static int io_setup_async_msg(struct io_kiocb *req, struct io_async_msghdr *kmsg, unsigned int issue_flags) { - struct io_async_msghdr *async_msg = req->async_data; + struct io_async_msghdr *async_msg; - if (async_msg) + if (req_has_async_data(req)) return -EAGAIN; async_msg = io_recvmsg_alloc_async(req, issue_flags); if (!async_msg) { @@ -977,6 +977,14 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) msg.msg_controllen = 0; msg.msg_namelen = 0; + if (zc->addr) { + ret = move_addr_to_kernel(zc->addr, zc->addr_len, &address); + if (unlikely(ret < 0)) + return ret; + msg.msg_name = (struct sockaddr *)&address; + msg.msg_namelen = zc->addr_len; + } + if (zc->flags & IORING_RECVSEND_FIXED_BUF) { ret = io_import_fixed(WRITE, &msg.msg_iter, req->imu, (u64)(uintptr_t)zc->buf, zc->len); @@ -992,14 +1000,6 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) return ret; } - if (zc->addr) { - ret = move_addr_to_kernel(zc->addr, zc->addr_len, &address); - if (unlikely(ret < 0)) - return ret; - msg.msg_name = (struct sockaddr *)&address; - msg.msg_namelen = zc->addr_len; - } - msg_flags = zc->msg_flags | MSG_ZEROCOPY; if (issue_flags & IO_URING_F_NONBLOCK) msg_flags |= MSG_DONTWAIT; diff --git a/io_uring/notif.h b/io_uring/notif.h index 65f0b42f2555..80f6445e0c2b 100644 --- a/io_uring/notif.h +++ b/io_uring/notif.h @@ -8,7 +8,7 @@ #include "rsrc.h" #define IO_NOTIF_SPLICE_BATCH 32 -#define IORING_MAX_NOTIF_SLOTS (1U << 10) +#define IORING_MAX_NOTIF_SLOTS (1U << 15) struct io_notif_data { struct file *file; diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c index 85fa9dbfa8bf..82c61612f382 100644 --- a/kernel/bpf/reuseport_array.c +++ b/kernel/bpf/reuseport_array.c @@ -24,7 +24,7 @@ void bpf_sk_reuseport_detach(struct sock *sk) struct sock __rcu **socks; write_lock_bh(&sk->sk_callback_lock); - socks = __rcu_dereference_sk_user_data_with_flags(sk, SK_USER_DATA_BPF); + socks = __locked_read_sk_user_data_with_flags(sk, SK_USER_DATA_BPF); if (socks) { WRITE_ONCE(sk->sk_user_data, NULL); /* diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index bc921a3f7ea8..126c769d36c3 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2974,6 +2974,16 @@ int ftrace_startup(struct ftrace_ops *ops, int command) ftrace_startup_enable(command); + /* + * If ftrace is in an undefined state, we just remove ops from list + * to prevent the NULL pointer, instead of totally rolling it back and + * free trampoline, because those actions could cause further damage. + */ + if (unlikely(ftrace_disabled)) { + __unregister_ftrace_function(ops); + return -ENODEV; + } + ops->flags &= ~FTRACE_OPS_FL_ADDING; return 0; diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index 4a0e9d927443..1783e3478912 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -227,6 +227,7 @@ static int trace_eprobe_tp_arg_update(struct trace_eprobe *ep, int i) struct probe_arg *parg = &ep->tp.args[i]; struct ftrace_event_field *field; struct list_head *head; + int ret = -ENOENT; head = trace_get_fields(ep->event); list_for_each_entry(field, head, link) { @@ -236,9 +237,20 @@ static int trace_eprobe_tp_arg_update(struct trace_eprobe *ep, int i) return 0; } } + + /* + * Argument not found on event. But allow for comm and COMM + * to be used to get the current->comm. + */ + if (strcmp(parg->code->data, "COMM") == 0 || + strcmp(parg->code->data, "comm") == 0) { + parg->code->op = FETCH_OP_COMM; + ret = 0; + } + kfree(parg->code->data); parg->code->data = NULL; - return -ENOENT; + return ret; } static int eprobe_event_define_fields(struct trace_event_call *event_call) @@ -311,6 +323,27 @@ static unsigned long get_event_field(struct fetch_insn *code, void *rec) addr = rec + field->offset; + if (is_string_field(field)) { + switch (field->filter_type) { + case FILTER_DYN_STRING: + val = (unsigned long)(rec + (*(unsigned int *)addr & 0xffff)); + break; + case FILTER_RDYN_STRING: + val = (unsigned long)(addr + (*(unsigned int *)addr & 0xffff)); + break; + case FILTER_STATIC_STRING: + val = (unsigned long)addr; + break; + case FILTER_PTR_STRING: + val = (unsigned long)(*(char *)addr); + break; + default: + WARN_ON_ONCE(1); + return 0; + } + return val; + } + switch (field->size) { case 1: if (field->is_signed) @@ -342,16 +375,38 @@ static unsigned long get_event_field(struct fetch_insn *code, void *rec) static int get_eprobe_size(struct trace_probe *tp, void *rec) { + struct fetch_insn *code; struct probe_arg *arg; int i, len, ret = 0; for (i = 0; i < tp->nr_args; i++) { arg = tp->args + i; - if (unlikely(arg->dynamic)) { + if (arg->dynamic) { unsigned long val; - val = get_event_field(arg->code, rec); - len = process_fetch_insn_bottom(arg->code + 1, val, NULL, NULL); + code = arg->code; + retry: + switch (code->op) { + case FETCH_OP_TP_ARG: + val = get_event_field(code, rec); + break; + case FETCH_OP_IMM: + val = code->immediate; + break; + case FETCH_OP_COMM: + val = (unsigned long)current->comm; + break; + case FETCH_OP_DATA: + val = (unsigned long)code->data; + break; + case FETCH_NOP_SYMBOL: /* Ignore a place holder */ + code++; + goto retry; + default: + continue; + } + code++; + len = process_fetch_insn_bottom(code, val, NULL, NULL); if (len > 0) ret += len; } @@ -369,8 +424,28 @@ process_fetch_insn(struct fetch_insn *code, void *rec, void *dest, { unsigned long val; - val = get_event_field(code, rec); - return process_fetch_insn_bottom(code + 1, val, dest, base); + retry: + switch (code->op) { + case FETCH_OP_TP_ARG: + val = get_event_field(code, rec); + break; + case FETCH_OP_IMM: + val = code->immediate; + break; + case FETCH_OP_COMM: + val = (unsigned long)current->comm; + break; + case FETCH_OP_DATA: + val = (unsigned long)code->data; + break; + case FETCH_NOP_SYMBOL: /* Ignore a place holder */ + code++; + goto retry; + default: + return -EILSEQ; + } + code++; + return process_fetch_insn_bottom(code, val, dest, base); } NOKPROBE_SYMBOL(process_fetch_insn) @@ -845,6 +920,10 @@ static int trace_eprobe_tp_update_arg(struct trace_eprobe *ep, const char *argv[ trace_probe_log_err(0, BAD_ATTACH_ARG); } + /* Handle symbols "@" */ + if (!ret) + ret = traceprobe_update_arg(&ep->tp.args[i]); + return ret; } @@ -883,7 +962,7 @@ static int __trace_eprobe_create(int argc, const char *argv[]) trace_probe_log_set_index(1); sys_event = argv[1]; ret = traceprobe_parse_event_name(&sys_event, &sys_name, buf2, 0); - if (!sys_event || !sys_name) { + if (ret || !sys_event || !sys_name) { trace_probe_log_err(0, NO_EVENT_INFO); goto parse_error; } diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index a114549720d6..61e3a2620fa3 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -157,7 +157,7 @@ static void perf_trace_event_unreg(struct perf_event *p_event) int i; if (--tp_event->perf_refcount > 0) - goto out; + return; tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER, NULL); @@ -176,8 +176,6 @@ static void perf_trace_event_unreg(struct perf_event *p_event) perf_trace_buf[i] = NULL; } } -out: - trace_event_put_ref(tp_event); } static int perf_trace_event_open(struct perf_event *p_event) @@ -241,6 +239,7 @@ void perf_trace_destroy(struct perf_event *p_event) mutex_lock(&event_mutex); perf_trace_event_close(p_event); perf_trace_event_unreg(p_event); + trace_event_put_ref(p_event->tp_event); mutex_unlock(&event_mutex); } @@ -292,6 +291,7 @@ void perf_kprobe_destroy(struct perf_event *p_event) mutex_lock(&event_mutex); perf_trace_event_close(p_event); perf_trace_event_unreg(p_event); + trace_event_put_ref(p_event->tp_event); mutex_unlock(&event_mutex); destroy_local_trace_kprobe(p_event->tp_event); @@ -347,6 +347,7 @@ void perf_uprobe_destroy(struct perf_event *p_event) mutex_lock(&event_mutex); perf_trace_event_close(p_event); perf_trace_event_unreg(p_event); + trace_event_put_ref(p_event->tp_event); mutex_unlock(&event_mutex); destroy_local_trace_uprobe(p_event->tp_event); } diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 181f08186d32..0356cae0cf74 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -176,6 +176,7 @@ static int trace_define_generic_fields(void) __generic_field(int, CPU, FILTER_CPU); __generic_field(int, cpu, FILTER_CPU); + __generic_field(int, common_cpu, FILTER_CPU); __generic_field(char *, COMM, FILTER_COMM); __generic_field(char *, comm, FILTER_COMM); diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 850a88abd33b..36dff277de46 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -283,7 +283,14 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t, int ret = 0; int len; - if (strcmp(arg, "retval") == 0) { + if (flags & TPARG_FL_TPOINT) { + if (code->data) + return -EFAULT; + code->data = kstrdup(arg, GFP_KERNEL); + if (!code->data) + return -ENOMEM; + code->op = FETCH_OP_TP_ARG; + } else if (strcmp(arg, "retval") == 0) { if (flags & TPARG_FL_RETURN) { code->op = FETCH_OP_RETVAL; } else { @@ -307,7 +314,7 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t, } } else goto inval_var; - } else if (strcmp(arg, "comm") == 0) { + } else if (strcmp(arg, "comm") == 0 || strcmp(arg, "COMM") == 0) { code->op = FETCH_OP_COMM; #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API } else if (((flags & TPARG_FL_MASK) == @@ -323,13 +330,6 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t, code->op = FETCH_OP_ARG; code->param = (unsigned int)param - 1; #endif - } else if (flags & TPARG_FL_TPOINT) { - if (code->data) - return -EFAULT; - code->data = kstrdup(arg, GFP_KERNEL); - if (!code->data) - return -ENOMEM; - code->op = FETCH_OP_TP_ARG; } else goto inval_var; @@ -384,6 +384,11 @@ parse_probe_arg(char *arg, const struct fetch_type *type, break; case '%': /* named register */ + if (flags & TPARG_FL_TPOINT) { + /* eprobes do not handle registers */ + trace_probe_log_err(offs, BAD_VAR); + break; + } ret = regs_query_register_offset(arg + 1); if (ret >= 0) { code->op = FETCH_OP_REG; @@ -617,9 +622,11 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size, /* * Since $comm and immediate string can not be dereferenced, - * we can find those by strcmp. + * we can find those by strcmp. But ignore for eprobes. */ - if (strcmp(arg, "$comm") == 0 || strncmp(arg, "\\\"", 2) == 0) { + if (!(flags & TPARG_FL_TPOINT) && + (strcmp(arg, "$comm") == 0 || strcmp(arg, "$COMM") == 0 || + strncmp(arg, "\\\"", 2) == 0)) { /* The type of $comm must be "string", and not an array. */ if (parg->count || (t && strcmp(t, "string"))) goto out; diff --git a/lib/Makefile b/lib/Makefile index c95212141928..5927d7fa0806 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -34,9 +34,10 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ earlycpio.o seq_buf.o siphash.o dec_and_lock.o \ nmi_backtrace.o win_minmax.o memcat_p.o \ - buildid.o cpumask.o + buildid.o lib-$(CONFIG_PRINTK) += dump_stack.o +lib-$(CONFIG_SMP) += cpumask.o lib-y += kobject.o klist.o obj-y += lockref.o diff --git a/lib/cpumask.c b/lib/cpumask.c index 8baeb37e23d3..f0ae119be8c4 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -109,7 +109,6 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask) } #endif -#if NR_CPUS > 1 /** * cpumask_local_spread - select the i'th cpu with local numa cpu's first * @i: index number @@ -197,4 +196,3 @@ unsigned int cpumask_any_distribute(const struct cpumask *srcp) return next; } EXPORT_SYMBOL(cpumask_any_distribute); -#endif /* NR_CPUS */ diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index a10335b4ba2d..c8d137ef5980 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -345,7 +345,7 @@ static void gnet_stats_add_queue_cpu(struct gnet_stats_queue *qstats, for_each_possible_cpu(i) { const struct gnet_stats_queue *qcpu = per_cpu_ptr(q, i); - qstats->qlen += qcpu->backlog; + qstats->qlen += qcpu->qlen; qstats->backlog += qcpu->backlog; qstats->drops += qcpu->drops; qstats->requeues += qcpu->requeues; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 6a8c2596ebab..5b669eb80270 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -307,14 +307,32 @@ static int neigh_del_timer(struct neighbour *n) return 0; } -static void pneigh_queue_purge(struct sk_buff_head *list) +static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net) { + unsigned long flags; struct sk_buff *skb; - while ((skb = skb_dequeue(list)) != NULL) { - dev_put(skb->dev); - kfree_skb(skb); + spin_lock_irqsave(&list->lock, flags); + skb = skb_peek(list); + while (skb != NULL) { + struct sk_buff *skb_next = skb_peek_next(skb, list); + struct net_device *dev = skb->dev; + if (net == NULL || net_eq(dev_net(dev), net)) { + struct in_device *in_dev; + + rcu_read_lock(); + in_dev = __in_dev_get_rcu(dev); + if (in_dev) + in_dev->arp_parms->qlen--; + rcu_read_unlock(); + __skb_unlink(skb, list); + + dev_put(dev); + kfree_skb(skb); + } + skb = skb_next; } + spin_unlock_irqrestore(&list->lock, flags); } static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev, @@ -385,9 +403,9 @@ static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev, write_lock_bh(&tbl->lock); neigh_flush_dev(tbl, dev, skip_perm); pneigh_ifdown_and_unlock(tbl, dev); - - del_timer_sync(&tbl->proxy_timer); - pneigh_queue_purge(&tbl->proxy_queue); + pneigh_queue_purge(&tbl->proxy_queue, dev_net(dev)); + if (skb_queue_empty_lockless(&tbl->proxy_queue)) + del_timer_sync(&tbl->proxy_timer); return 0; } @@ -1597,8 +1615,15 @@ static void neigh_proxy_process(struct timer_list *t) if (tdif <= 0) { struct net_device *dev = skb->dev; + struct in_device *in_dev; + rcu_read_lock(); + in_dev = __in_dev_get_rcu(dev); + if (in_dev) + in_dev->arp_parms->qlen--; + rcu_read_unlock(); __skb_unlink(skb, &tbl->proxy_queue); + if (tbl->proxy_redo && netif_running(dev)) { rcu_read_lock(); tbl->proxy_redo(skb); @@ -1623,7 +1648,7 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, unsigned long sched_next = jiffies + prandom_u32_max(NEIGH_VAR(p, PROXY_DELAY)); - if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) { + if (p->qlen > NEIGH_VAR(p, PROXY_QLEN)) { kfree_skb(skb); return; } @@ -1639,6 +1664,7 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, skb_dst_drop(skb); dev_hold(skb->dev); __skb_queue_tail(&tbl->proxy_queue, skb); + p->qlen++; mod_timer(&tbl->proxy_timer, sched_next); spin_unlock(&tbl->proxy_queue.lock); } @@ -1671,6 +1697,7 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, refcount_set(&p->refcnt, 1); p->reachable_time = neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); + p->qlen = 0; netdev_hold(dev, &p->dev_tracker, GFP_KERNEL); p->dev = dev; write_pnet(&p->net, net); @@ -1736,6 +1763,7 @@ void neigh_table_init(int index, struct neigh_table *tbl) refcount_set(&tbl->parms.refcnt, 1); tbl->parms.reachable_time = neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME)); + tbl->parms.qlen = 0; tbl->stats = alloc_percpu(struct neigh_statistics); if (!tbl->stats) @@ -1787,7 +1815,7 @@ int neigh_table_clear(int index, struct neigh_table *tbl) cancel_delayed_work_sync(&tbl->managed_work); cancel_delayed_work_sync(&tbl->gc_work); del_timer_sync(&tbl->proxy_timer); - pneigh_queue_purge(&tbl->proxy_queue); + pneigh_queue_purge(&tbl->proxy_queue, NULL); neigh_ifdown(tbl, NULL); if (atomic_read(&tbl->entries)) pr_crit("neighbour leakage\n"); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index ac45328607f7..4b5b15c684ed 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -6070,6 +6070,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, if (kind == RTNL_KIND_DEL && (nlh->nlmsg_flags & NLM_F_BULK) && !(flags & RTNL_FLAG_BULK_DEL_SUPPORTED)) { NL_SET_ERR_MSG(extack, "Bulk delete is not supported"); + module_put(owner); goto err_unlock; } diff --git a/net/core/skmsg.c b/net/core/skmsg.c index f47338d89d5d..59e75ffcc1f4 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -1194,8 +1194,9 @@ static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb) ret = bpf_prog_run_pin_on_cpu(prog, skb); ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb)); } - if (sk_psock_verdict_apply(psock, skb, ret) < 0) - len = 0; + ret = sk_psock_verdict_apply(psock, skb, ret); + if (ret < 0) + len = ret; out: rcu_read_unlock(); return len; diff --git a/net/dsa/port.c b/net/dsa/port.c index 2dd76eb1621c..a8895ee3cd60 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -145,11 +145,14 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age) static void dsa_port_set_state_now(struct dsa_port *dp, u8 state, bool do_fast_age) { + struct dsa_switch *ds = dp->ds; int err; err = dsa_port_set_state(dp, state, do_fast_age); - if (err) - pr_err("DSA: failed to set STP state %u (%d)\n", state, err); + if (err && err != -EOPNOTSUPP) { + dev_err(ds->dev, "port %d failed to set STP state %u: %pe\n", + dp->index, state, ERR_PTR(err)); + } } int dsa_port_set_mst_state(struct dsa_port *dp, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 970e9a2cca4a..bbe218753662 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1567,17 +1567,11 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len) * calculation of whether or not we must ACK for the sake of * a window update. */ -void tcp_cleanup_rbuf(struct sock *sk, int copied) +static void __tcp_cleanup_rbuf(struct sock *sk, int copied) { struct tcp_sock *tp = tcp_sk(sk); bool time_to_ack = false; - struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); - - WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq), - "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n", - tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt); - if (inet_csk_ack_scheduled(sk)) { const struct inet_connection_sock *icsk = inet_csk(sk); @@ -1623,6 +1617,17 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) tcp_send_ack(sk); } +void tcp_cleanup_rbuf(struct sock *sk, int copied) +{ + struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); + struct tcp_sock *tp = tcp_sk(sk); + + WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq), + "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n", + tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt); + __tcp_cleanup_rbuf(sk, copied); +} + static void tcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb) { __skb_unlink(skb, &sk->sk_receive_queue); @@ -1756,34 +1761,26 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor) if (sk->sk_state == TCP_LISTEN) return -ENOTCONN; - while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) { - int used; - - __skb_unlink(skb, &sk->sk_receive_queue); - used = recv_actor(sk, skb); - if (used <= 0) { - if (!copied) - copied = used; - break; - } - seq += used; - copied += used; + skb = tcp_recv_skb(sk, seq, &offset); + if (!skb) + return 0; - if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) { - consume_skb(skb); + __skb_unlink(skb, &sk->sk_receive_queue); + WARN_ON(!skb_set_owner_sk_safe(skb, sk)); + copied = recv_actor(sk, skb); + if (copied >= 0) { + seq += copied; + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) ++seq; - break; - } - consume_skb(skb); - break; } + consume_skb(skb); WRITE_ONCE(tp->copied_seq, seq); tcp_rcv_space_adjust(sk); /* Clean up data we have read: This will do ACK frames. */ if (copied > 0) - tcp_cleanup_rbuf(sk, copied); + __tcp_cleanup_rbuf(sk, copied); return copied; } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 3fda5634578c..79c6a827dea9 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1517,7 +1517,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) * ip6_tnl_change() updates the tunnel parameters **/ -static int +static void ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p) { t->parms.laddr = p->laddr; @@ -1531,29 +1531,25 @@ ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p) t->parms.fwmark = p->fwmark; dst_cache_reset(&t->dst_cache); ip6_tnl_link_config(t); - return 0; } -static int ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p) +static void ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p) { struct net *net = t->net; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); - int err; ip6_tnl_unlink(ip6n, t); synchronize_net(); - err = ip6_tnl_change(t, p); + ip6_tnl_change(t, p); ip6_tnl_link(ip6n, t); netdev_state_change(t->dev); - return err; } -static int ip6_tnl0_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p) +static void ip6_tnl0_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p) { /* for default tnl0 device allow to change only the proto */ t->parms.proto = p->proto; netdev_state_change(t->dev); - return 0; } static void @@ -1667,9 +1663,9 @@ ip6_tnl_siocdevprivate(struct net_device *dev, struct ifreq *ifr, } else t = netdev_priv(dev); if (dev == ip6n->fb_tnl_dev) - err = ip6_tnl0_update(t, &p1); + ip6_tnl0_update(t, &p1); else - err = ip6_tnl_update(t, &p1); + ip6_tnl_update(t, &p1); } if (!IS_ERR(t)) { err = 0; @@ -2091,7 +2087,8 @@ static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[], } else t = netdev_priv(dev); - return ip6_tnl_update(t, &p); + ip6_tnl_update(t, &p); + return 0; } static void ip6_tnl_dellink(struct net_device *dev, struct list_head *head) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 98453693e400..3a553494ff16 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1378,6 +1378,9 @@ static void ndisc_router_discovery(struct sk_buff *skb) if (!rt && lifetime) { ND_PRINTK(3, info, "RA: adding default router\n"); + if (neigh) + neigh_release(neigh); + rt = rt6_add_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev, pref, defrtr_usr_metric); if (!rt) { diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 22f15ebf6045..4b8d04640ff3 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -144,7 +144,6 @@ config NF_CONNTRACK_ZONES config NF_CONNTRACK_PROCFS bool "Supply CT list in procfs (OBSOLETE)" - default y depends on PROC_FS help This option enables for the list of known conntrack entries diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index a414274338cf..0d9332e9cf71 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -34,11 +34,6 @@ MODULE_DESCRIPTION("ftp connection tracking helper"); MODULE_ALIAS("ip_conntrack_ftp"); MODULE_ALIAS_NFCT_HELPER(HELPER_NAME); -/* This is slow, but it's simple. --RR */ -static char *ftp_buffer; - -static DEFINE_SPINLOCK(nf_ftp_lock); - #define MAX_PORTS 8 static u_int16_t ports[MAX_PORTS]; static unsigned int ports_c; @@ -398,6 +393,9 @@ static int help(struct sk_buff *skb, return NF_ACCEPT; } + if (unlikely(skb_linearize(skb))) + return NF_DROP; + th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph); if (th == NULL) return NF_ACCEPT; @@ -411,12 +409,8 @@ static int help(struct sk_buff *skb, } datalen = skb->len - dataoff; - spin_lock_bh(&nf_ftp_lock); - fb_ptr = skb_header_pointer(skb, dataoff, datalen, ftp_buffer); - if (!fb_ptr) { - spin_unlock_bh(&nf_ftp_lock); - return NF_ACCEPT; - } + spin_lock_bh(&ct->lock); + fb_ptr = skb->data + dataoff; ends_in_nl = (fb_ptr[datalen - 1] == '\n'); seq = ntohl(th->seq) + datalen; @@ -544,7 +538,7 @@ out_update_nl: if (ends_in_nl) update_nl_seq(ct, seq, ct_ftp_info, dir, skb); out: - spin_unlock_bh(&nf_ftp_lock); + spin_unlock_bh(&ct->lock); return ret; } @@ -571,7 +565,6 @@ static const struct nf_conntrack_expect_policy ftp_exp_policy = { static void __exit nf_conntrack_ftp_fini(void) { nf_conntrack_helpers_unregister(ftp, ports_c * 2); - kfree(ftp_buffer); } static int __init nf_conntrack_ftp_init(void) @@ -580,10 +573,6 @@ static int __init nf_conntrack_ftp_init(void) NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_ftp_master)); - ftp_buffer = kmalloc(65536, GFP_KERNEL); - if (!ftp_buffer) - return -ENOMEM; - if (ports_c == 0) ports[ports_c++] = FTP_PORT; @@ -603,7 +592,6 @@ static int __init nf_conntrack_ftp_init(void) ret = nf_conntrack_helpers_register(ftp, ports_c * 2); if (ret < 0) { pr_err("failed to register helpers\n"); - kfree(ftp_buffer); return ret; } diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index bb76305bb7ff..5a9bce24f3c3 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -34,6 +34,8 @@ #include <net/netfilter/nf_conntrack_zones.h> #include <linux/netfilter/nf_conntrack_h323.h> +#define H323_MAX_SIZE 65535 + /* Parameters */ static unsigned int default_rrq_ttl __read_mostly = 300; module_param(default_rrq_ttl, uint, 0600); @@ -86,6 +88,9 @@ static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff, if (tcpdatalen <= 0) /* No TCP data */ goto clear_out; + if (tcpdatalen > H323_MAX_SIZE) + tcpdatalen = H323_MAX_SIZE; + if (*data == NULL) { /* first TPKT */ /* Get first TPKT pointer */ tpkt = skb_header_pointer(skb, tcpdataoff, tcpdatalen, @@ -1169,6 +1174,9 @@ static unsigned char *get_udp_data(struct sk_buff *skb, unsigned int protoff, if (dataoff >= skb->len) return NULL; *datalen = skb->len - dataoff; + if (*datalen > H323_MAX_SIZE) + *datalen = H323_MAX_SIZE; + return skb_header_pointer(skb, dataoff, *datalen, h323_buffer); } @@ -1770,7 +1778,7 @@ static int __init nf_conntrack_h323_init(void) NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_h323_master)); - h323_buffer = kmalloc(65536, GFP_KERNEL); + h323_buffer = kmalloc(H323_MAX_SIZE + 1, GFP_KERNEL); if (!h323_buffer) return -ENOMEM; ret = h323_helper_init(); diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 08ee4e760a3d..1796c456ac98 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -39,6 +39,7 @@ unsigned int (*nf_nat_irc_hook)(struct sk_buff *skb, EXPORT_SYMBOL_GPL(nf_nat_irc_hook); #define HELPER_NAME "irc" +#define MAX_SEARCH_SIZE 4095 MODULE_AUTHOR("Harald Welte <[email protected]>"); MODULE_DESCRIPTION("IRC (DCC) connection tracking helper"); @@ -121,6 +122,7 @@ static int help(struct sk_buff *skb, unsigned int protoff, int i, ret = NF_ACCEPT; char *addr_beg_p, *addr_end_p; typeof(nf_nat_irc_hook) nf_nat_irc; + unsigned int datalen; /* If packet is coming from IRC server */ if (dir == IP_CT_DIR_REPLY) @@ -140,8 +142,12 @@ static int help(struct sk_buff *skb, unsigned int protoff, if (dataoff >= skb->len) return NF_ACCEPT; + datalen = skb->len - dataoff; + if (datalen > MAX_SEARCH_SIZE) + datalen = MAX_SEARCH_SIZE; + spin_lock_bh(&irc_buffer_lock); - ib_ptr = skb_header_pointer(skb, dataoff, skb->len - dataoff, + ib_ptr = skb_header_pointer(skb, dataoff, datalen, irc_buffer); if (!ib_ptr) { spin_unlock_bh(&irc_buffer_lock); @@ -149,7 +155,7 @@ static int help(struct sk_buff *skb, unsigned int protoff, } data = ib_ptr; - data_limit = ib_ptr + skb->len - dataoff; + data_limit = ib_ptr + datalen; /* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24 * 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */ @@ -251,7 +257,7 @@ static int __init nf_conntrack_irc_init(void) irc_exp_policy.max_expected = max_dcc_channels; irc_exp_policy.timeout = dcc_timeout; - irc_buffer = kmalloc(65536, GFP_KERNEL); + irc_buffer = kmalloc(MAX_SEARCH_SIZE + 1, GFP_KERNEL); if (!irc_buffer) return -ENOMEM; diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index fcb33b1d5456..13dc421fc4f5 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -34,10 +34,6 @@ MODULE_AUTHOR("Michal Schmidt <[email protected]>"); MODULE_DESCRIPTION("SANE connection tracking helper"); MODULE_ALIAS_NFCT_HELPER(HELPER_NAME); -static char *sane_buffer; - -static DEFINE_SPINLOCK(nf_sane_lock); - #define MAX_PORTS 8 static u_int16_t ports[MAX_PORTS]; static unsigned int ports_c; @@ -67,14 +63,16 @@ static int help(struct sk_buff *skb, unsigned int dataoff, datalen; const struct tcphdr *th; struct tcphdr _tcph; - void *sb_ptr; int ret = NF_ACCEPT; int dir = CTINFO2DIR(ctinfo); struct nf_ct_sane_master *ct_sane_info = nfct_help_data(ct); struct nf_conntrack_expect *exp; struct nf_conntrack_tuple *tuple; - struct sane_request *req; struct sane_reply_net_start *reply; + union { + struct sane_request req; + struct sane_reply_net_start repl; + } buf; /* Until there's been traffic both ways, don't look in packets. */ if (ctinfo != IP_CT_ESTABLISHED && @@ -92,59 +90,62 @@ static int help(struct sk_buff *skb, return NF_ACCEPT; datalen = skb->len - dataoff; - - spin_lock_bh(&nf_sane_lock); - sb_ptr = skb_header_pointer(skb, dataoff, datalen, sane_buffer); - if (!sb_ptr) { - spin_unlock_bh(&nf_sane_lock); - return NF_ACCEPT; - } - if (dir == IP_CT_DIR_ORIGINAL) { + const struct sane_request *req; + if (datalen != sizeof(struct sane_request)) - goto out; + return NF_ACCEPT; + + req = skb_header_pointer(skb, dataoff, datalen, &buf.req); + if (!req) + return NF_ACCEPT; - req = sb_ptr; if (req->RPC_code != htonl(SANE_NET_START)) { /* Not an interesting command */ - ct_sane_info->state = SANE_STATE_NORMAL; - goto out; + WRITE_ONCE(ct_sane_info->state, SANE_STATE_NORMAL); + return NF_ACCEPT; } /* We're interested in the next reply */ - ct_sane_info->state = SANE_STATE_START_REQUESTED; - goto out; + WRITE_ONCE(ct_sane_info->state, SANE_STATE_START_REQUESTED); + return NF_ACCEPT; } + /* IP_CT_DIR_REPLY */ + /* Is it a reply to an uninteresting command? */ - if (ct_sane_info->state != SANE_STATE_START_REQUESTED) - goto out; + if (READ_ONCE(ct_sane_info->state) != SANE_STATE_START_REQUESTED) + return NF_ACCEPT; /* It's a reply to SANE_NET_START. */ - ct_sane_info->state = SANE_STATE_NORMAL; + WRITE_ONCE(ct_sane_info->state, SANE_STATE_NORMAL); if (datalen < sizeof(struct sane_reply_net_start)) { pr_debug("NET_START reply too short\n"); - goto out; + return NF_ACCEPT; } - reply = sb_ptr; + datalen = sizeof(struct sane_reply_net_start); + + reply = skb_header_pointer(skb, dataoff, datalen, &buf.repl); + if (!reply) + return NF_ACCEPT; + if (reply->status != htonl(SANE_STATUS_SUCCESS)) { /* saned refused the command */ pr_debug("unsuccessful SANE_STATUS = %u\n", ntohl(reply->status)); - goto out; + return NF_ACCEPT; } /* Invalid saned reply? Ignore it. */ if (reply->zero != 0) - goto out; + return NF_ACCEPT; exp = nf_ct_expect_alloc(ct); if (exp == NULL) { nf_ct_helper_log(skb, ct, "cannot alloc expectation"); - ret = NF_DROP; - goto out; + return NF_DROP; } tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; @@ -162,9 +163,6 @@ static int help(struct sk_buff *skb, } nf_ct_expect_put(exp); - -out: - spin_unlock_bh(&nf_sane_lock); return ret; } @@ -178,7 +176,6 @@ static const struct nf_conntrack_expect_policy sane_exp_policy = { static void __exit nf_conntrack_sane_fini(void) { nf_conntrack_helpers_unregister(sane, ports_c * 2); - kfree(sane_buffer); } static int __init nf_conntrack_sane_init(void) @@ -187,10 +184,6 @@ static int __init nf_conntrack_sane_init(void) NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_sane_master)); - sane_buffer = kmalloc(65536, GFP_KERNEL); - if (!sane_buffer) - return -ENOMEM; - if (ports_c == 0) ports[ports_c++] = SANE_PORT; @@ -210,7 +203,6 @@ static int __init nf_conntrack_sane_init(void) ret = nf_conntrack_helpers_register(sane, ports_c * 2); if (ret < 0) { pr_err("failed to register helpers\n"); - kfree(sane_buffer); return ret; } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3cc88998b879..62cfb0e31c40 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -889,7 +889,7 @@ static int nf_tables_dump_tables(struct sk_buff *skb, rcu_read_lock(); nft_net = nft_pernet(net); - cb->seq = nft_net->base_seq; + cb->seq = READ_ONCE(nft_net->base_seq); list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) @@ -1705,7 +1705,7 @@ static int nf_tables_dump_chains(struct sk_buff *skb, rcu_read_lock(); nft_net = nft_pernet(net); - cb->seq = nft_net->base_seq; + cb->seq = READ_ONCE(nft_net->base_seq); list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) @@ -3149,7 +3149,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb, rcu_read_lock(); nft_net = nft_pernet(net); - cb->seq = nft_net->base_seq; + cb->seq = READ_ONCE(nft_net->base_seq); list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) @@ -3907,7 +3907,7 @@ cont: list_for_each_entry(i, &ctx->table->sets, list) { int tmp; - if (!nft_is_active_next(ctx->net, set)) + if (!nft_is_active_next(ctx->net, i)) continue; if (!sscanf(i->name, name, &tmp)) continue; @@ -4133,7 +4133,7 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); nft_net = nft_pernet(net); - cb->seq = nft_net->base_seq; + cb->seq = READ_ONCE(nft_net->base_seq); list_for_each_entry_rcu(table, &nft_net->tables, list) { if (ctx->family != NFPROTO_UNSPEC && @@ -4451,6 +4451,11 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, err = nf_tables_set_desc_parse(&desc, nla[NFTA_SET_DESC]); if (err < 0) return err; + + if (desc.field_count > 1 && !(flags & NFT_SET_CONCAT)) + return -EINVAL; + } else if (flags & NFT_SET_CONCAT) { + return -EINVAL; } if (nla[NFTA_SET_EXPR] || nla[NFTA_SET_EXPRESSIONS]) @@ -5061,6 +5066,8 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); nft_net = nft_pernet(net); + cb->seq = READ_ONCE(nft_net->base_seq); + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (dump_ctx->ctx.family != NFPROTO_UNSPEC && dump_ctx->ctx.family != table->family) @@ -5196,6 +5203,9 @@ static int nft_setelem_parse_flags(const struct nft_set *set, if (!(set->flags & NFT_SET_INTERVAL) && *flags & NFT_SET_ELEM_INTERVAL_END) return -EINVAL; + if ((*flags & (NFT_SET_ELEM_INTERVAL_END | NFT_SET_ELEM_CATCHALL)) == + (NFT_SET_ELEM_INTERVAL_END | NFT_SET_ELEM_CATCHALL)) + return -EINVAL; return 0; } @@ -5599,7 +5609,7 @@ int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set, err = nft_expr_clone(expr, set->exprs[i]); if (err < 0) { - nft_expr_destroy(ctx, expr); + kfree(expr); goto err_expr; } expr_array[i] = expr; @@ -5842,6 +5852,24 @@ static void nft_setelem_remove(const struct net *net, set->ops->remove(net, set, elem); } +static bool nft_setelem_valid_key_end(const struct nft_set *set, + struct nlattr **nla, u32 flags) +{ + if ((set->flags & (NFT_SET_CONCAT | NFT_SET_INTERVAL)) == + (NFT_SET_CONCAT | NFT_SET_INTERVAL)) { + if (flags & NFT_SET_ELEM_INTERVAL_END) + return false; + if (!nla[NFTA_SET_ELEM_KEY_END] && + !(flags & NFT_SET_ELEM_CATCHALL)) + return false; + } else { + if (nla[NFTA_SET_ELEM_KEY_END]) + return false; + } + + return true; +} + static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr, u32 nlmsg_flags) { @@ -5892,6 +5920,18 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, return -EINVAL; } + if (set->flags & NFT_SET_OBJECT) { + if (!nla[NFTA_SET_ELEM_OBJREF] && + !(flags & NFT_SET_ELEM_INTERVAL_END)) + return -EINVAL; + } else { + if (nla[NFTA_SET_ELEM_OBJREF]) + return -EINVAL; + } + + if (!nft_setelem_valid_key_end(set, nla, flags)) + return -EINVAL; + if ((flags & NFT_SET_ELEM_INTERVAL_END) && (nla[NFTA_SET_ELEM_DATA] || nla[NFTA_SET_ELEM_OBJREF] || @@ -5899,6 +5939,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, nla[NFTA_SET_ELEM_EXPIRATION] || nla[NFTA_SET_ELEM_USERDATA] || nla[NFTA_SET_ELEM_EXPR] || + nla[NFTA_SET_ELEM_KEY_END] || nla[NFTA_SET_ELEM_EXPRESSIONS])) return -EINVAL; @@ -6029,10 +6070,6 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, } if (nla[NFTA_SET_ELEM_OBJREF] != NULL) { - if (!(set->flags & NFT_SET_OBJECT)) { - err = -EINVAL; - goto err_parse_key_end; - } obj = nft_obj_lookup(ctx->net, ctx->table, nla[NFTA_SET_ELEM_OBJREF], set->objtype, genmask); @@ -6325,6 +6362,9 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, if (!nla[NFTA_SET_ELEM_KEY] && !(flags & NFT_SET_ELEM_CATCHALL)) return -EINVAL; + if (!nft_setelem_valid_key_end(set, nla, flags)) + return -EINVAL; + nft_set_ext_prepare(&tmpl); if (flags != 0) { @@ -6941,7 +6981,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); nft_net = nft_pernet(net); - cb->seq = nft_net->base_seq; + cb->seq = READ_ONCE(nft_net->base_seq); list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) @@ -7873,7 +7913,7 @@ static int nf_tables_dump_flowtable(struct sk_buff *skb, rcu_read_lock(); nft_net = nft_pernet(net); - cb->seq = nft_net->base_seq; + cb->seq = READ_ONCE(nft_net->base_seq); list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) @@ -8806,6 +8846,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) struct nft_trans_elem *te; struct nft_chain *chain; struct nft_table *table; + unsigned int base_seq; LIST_HEAD(adl); int err; @@ -8855,9 +8896,12 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) * Bump generation counter, invalidate any dump in progress. * Cannot fail after this point. */ - while (++nft_net->base_seq == 0) + base_seq = READ_ONCE(nft_net->base_seq); + while (++base_seq == 0) ; + WRITE_ONCE(nft_net->base_seq, base_seq); + /* step 3. Start new generation, rules_gen_X now in use. */ net->nft.gencursor = nft_gencursor_next(net); @@ -9419,13 +9463,9 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, break; } } - - cond_resched(); } list_for_each_entry(set, &ctx->table->sets, list) { - cond_resched(); - if (!nft_is_active_next(ctx->net, set)) continue; if (!(set->flags & NFT_SET_MAP) || diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index c24b1240908f..9c44518cb70f 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -44,6 +44,10 @@ MODULE_DESCRIPTION("Netfilter messages via netlink socket"); static unsigned int nfnetlink_pernet_id __read_mostly; +#ifdef CONFIG_NF_CONNTRACK_EVENTS +static DEFINE_SPINLOCK(nfnl_grp_active_lock); +#endif + struct nfnl_net { struct sock *nfnl; }; @@ -654,6 +658,44 @@ static void nfnetlink_rcv(struct sk_buff *skb) netlink_rcv_skb(skb, nfnetlink_rcv_msg); } +static void nfnetlink_bind_event(struct net *net, unsigned int group) +{ +#ifdef CONFIG_NF_CONNTRACK_EVENTS + int type, group_bit; + u8 v; + + /* All NFNLGRP_CONNTRACK_* group bits fit into u8. + * The other groups are not relevant and can be ignored. + */ + if (group >= 8) + return; + + type = nfnl_group2type[group]; + + switch (type) { + case NFNL_SUBSYS_CTNETLINK: + break; + case NFNL_SUBSYS_CTNETLINK_EXP: + break; + default: + return; + } + + group_bit = (1 << group); + + spin_lock(&nfnl_grp_active_lock); + v = READ_ONCE(net->ct.ctnetlink_has_listener); + if ((v & group_bit) == 0) { + v |= group_bit; + + /* read concurrently without nfnl_grp_active_lock held. */ + WRITE_ONCE(net->ct.ctnetlink_has_listener, v); + } + + spin_unlock(&nfnl_grp_active_lock); +#endif +} + static int nfnetlink_bind(struct net *net, int group) { const struct nfnetlink_subsystem *ss; @@ -670,28 +712,45 @@ static int nfnetlink_bind(struct net *net, int group) if (!ss) request_module_nowait("nfnetlink-subsys-%d", type); -#ifdef CONFIG_NF_CONNTRACK_EVENTS - if (type == NFNL_SUBSYS_CTNETLINK) { - nfnl_lock(NFNL_SUBSYS_CTNETLINK); - WRITE_ONCE(net->ct.ctnetlink_has_listener, true); - nfnl_unlock(NFNL_SUBSYS_CTNETLINK); - } -#endif + nfnetlink_bind_event(net, group); return 0; } static void nfnetlink_unbind(struct net *net, int group) { #ifdef CONFIG_NF_CONNTRACK_EVENTS + int type, group_bit; + if (group <= NFNLGRP_NONE || group > NFNLGRP_MAX) return; - if (nfnl_group2type[group] == NFNL_SUBSYS_CTNETLINK) { - nfnl_lock(NFNL_SUBSYS_CTNETLINK); - if (!nfnetlink_has_listeners(net, group)) - WRITE_ONCE(net->ct.ctnetlink_has_listener, false); - nfnl_unlock(NFNL_SUBSYS_CTNETLINK); + type = nfnl_group2type[group]; + + switch (type) { + case NFNL_SUBSYS_CTNETLINK: + break; + case NFNL_SUBSYS_CTNETLINK_EXP: + break; + default: + return; + } + + /* ctnetlink_has_listener is u8 */ + if (group >= 8) + return; + + group_bit = (1 << group); + + spin_lock(&nfnl_grp_active_lock); + if (!nfnetlink_has_listeners(net, group)) { + u8 v = READ_ONCE(net->ct.ctnetlink_has_listener); + + v &= ~group_bit; + + /* read concurrently without nfnl_grp_active_lock held. */ + WRITE_ONCE(net->ct.ctnetlink_has_listener, v); } + spin_unlock(&nfnl_grp_active_lock); #endif } diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 1afca2a6c2ac..57010927e20a 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1174,13 +1174,17 @@ static int ctrl_dumppolicy_start(struct netlink_callback *cb) op.policy, op.maxattr); if (err) - return err; + goto err_free_state; } } if (!ctx->state) return -ENODATA; return 0; + +err_free_state: + netlink_policy_dump_free(ctx->state); + return err; } static void *ctrl_dumppolicy_prep(struct sk_buff *skb, diff --git a/net/netlink/policy.c b/net/netlink/policy.c index 8d7c900e27f4..87e3de0fde89 100644 --- a/net/netlink/policy.c +++ b/net/netlink/policy.c @@ -144,7 +144,7 @@ int netlink_policy_dump_add_policy(struct netlink_policy_dump_state **pstate, err = add_policy(&state, policy, maxtype); if (err) - return err; + goto err_try_undo; for (policy_idx = 0; policy_idx < state->n_alloc && state->policies[policy_idx].policy; @@ -164,7 +164,7 @@ int netlink_policy_dump_add_policy(struct netlink_policy_dump_state **pstate, policy[type].nested_policy, policy[type].len); if (err) - return err; + goto err_try_undo; break; default: break; @@ -174,6 +174,16 @@ int netlink_policy_dump_add_policy(struct netlink_policy_dump_state **pstate, *pstate = state; return 0; + +err_try_undo: + /* Try to preserve reasonable unwind semantics - if we're starting from + * scratch clean up fully, otherwise record what we got and caller will. + */ + if (!*pstate) + netlink_policy_dump_free(state); + else + *pstate = state; + return err; } static bool diff --git a/net/qrtr/mhi.c b/net/qrtr/mhi.c index 18196e1c8c2f..9ced13c0627a 100644 --- a/net/qrtr/mhi.c +++ b/net/qrtr/mhi.c @@ -78,11 +78,6 @@ static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev, struct qrtr_mhi_dev *qdev; int rc; - /* start channels */ - rc = mhi_prepare_for_transfer_autoqueue(mhi_dev); - if (rc) - return rc; - qdev = devm_kzalloc(&mhi_dev->dev, sizeof(*qdev), GFP_KERNEL); if (!qdev) return -ENOMEM; @@ -96,6 +91,13 @@ static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev, if (rc) return rc; + /* start channels */ + rc = mhi_prepare_for_transfer_autoqueue(mhi_dev); + if (rc) { + qrtr_endpoint_unregister(&qdev->ep); + return rc; + } + dev_dbg(qdev->dev, "Qualcomm MHI QRTR driver probed\n"); return 0; diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 6fdedd9dbbc2..cfbf0e129cba 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -363,6 +363,7 @@ static int acquire_refill(struct rds_connection *conn) static void release_refill(struct rds_connection *conn) { clear_bit(RDS_RECV_REFILL, &conn->c_flags); + smp_mb__after_atomic(); /* We don't use wait_on_bit()/wake_up_bit() because our waking is in a * hot path and finding waiters is very rare. We don't want to walk diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 3f935cbbaff6..48712bc51bda 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -424,6 +424,11 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, return -EINVAL; } + if (!nhandle) { + NL_SET_ERR_MSG(extack, "Replacing with handle of 0 is invalid"); + return -EINVAL; + } + h1 = to_hash(nhandle); b = rtnl_dereference(head->table[h1]); if (!b) { @@ -477,6 +482,11 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, int err; bool new = true; + if (!handle) { + NL_SET_ERR_MSG(extack, "Creating with handle of 0 is invalid"); + return -EINVAL; + } + if (opt == NULL) return handle ? -EINVAL : 0; diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c index 7330eb9a70cf..c65c90ad626a 100644 --- a/net/sunrpc/sysfs.c +++ b/net/sunrpc/sysfs.c @@ -291,8 +291,10 @@ static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj, int offline = 0, online = 0, remove = 0; struct rpc_xprt_switch *xps = rpc_sysfs_xprt_kobj_get_xprt_switch(kobj); - if (!xprt) - return 0; + if (!xprt || !xps) { + count = 0; + goto out_put; + } if (!strncmp(buf, "offline", 7)) offline = 1; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index f76119f62f1b..fe27241cd13f 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2702,7 +2702,9 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) crypto_info->version != TLS_1_3_VERSION && !!(tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC); - tls_strp_init(&sw_ctx_rx->strp, sk); + rc = tls_strp_init(&sw_ctx_rx->strp, sk); + if (rc) + goto free_aead; } goto out; diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index f5f0d6f09053..0621c39a3955 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -49,7 +49,6 @@ ifdef CONFIG_CC_IS_CLANG KBUILD_CFLAGS += -Wno-initializer-overrides KBUILD_CFLAGS += -Wno-format KBUILD_CFLAGS += -Wno-sign-compare -KBUILD_CFLAGS += -Wno-format-zero-length KBUILD_CFLAGS += $(call cc-disable-warning, pointer-to-enum-cast) KBUILD_CFLAGS += -Wno-tautological-constant-out-of-range-compare KBUILD_CFLAGS += $(call cc-disable-warning, unaligned-access) diff --git a/scripts/Makefile.gcc-plugins b/scripts/Makefile.gcc-plugins index 692d64a70542..e4deaf5fa571 100644 --- a/scripts/Makefile.gcc-plugins +++ b/scripts/Makefile.gcc-plugins @@ -4,7 +4,7 @@ gcc-plugin-$(CONFIG_GCC_PLUGIN_LATENT_ENTROPY) += latent_entropy_plugin.so gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_LATENT_ENTROPY) \ += -DLATENT_ENTROPY_PLUGIN ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY - DISABLE_LATENT_ENTROPY_PLUGIN += -fplugin-arg-latent_entropy_plugin-disable + DISABLE_LATENT_ENTROPY_PLUGIN += -fplugin-arg-latent_entropy_plugin-disable -ULATENT_ENTROPY_PLUGIN endif export DISABLE_LATENT_ENTROPY_PLUGIN diff --git a/scripts/clang-tools/run-clang-tools.py b/scripts/clang-tools/run-clang-tools.py index f754415af398..1337cedca096 100755 --- a/scripts/clang-tools/run-clang-tools.py +++ b/scripts/clang-tools/run-clang-tools.py @@ -51,6 +51,7 @@ def run_analysis(entry): checks += "linuxkernel-*" else: checks += "clang-analyzer-*" + checks += ",-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling" p = subprocess.run(["clang-tidy", "-p", args.path, checks, entry["file"]], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, diff --git a/scripts/dummy-tools/gcc b/scripts/dummy-tools/gcc index 7db825843435..1db1889f6d81 100755 --- a/scripts/dummy-tools/gcc +++ b/scripts/dummy-tools/gcc @@ -59,7 +59,7 @@ fi if arg_contain -E "$@"; then # For scripts/cc-version.sh; This emulates GCC 20.0.0 if arg_contain - "$@"; then - sed -n '/^GCC/{s/__GNUC__/20/; s/__GNUC_MINOR__/0/; s/__GNUC_PATCHLEVEL__/0/; p;}' + sed -n '/^GCC/{s/__GNUC__/20/; s/__GNUC_MINOR__/0/; s/__GNUC_PATCHLEVEL__/0/; p;}; s/__LONG_DOUBLE_128__/1/ p' exit 0 else echo "no input files" >&2 diff --git a/scripts/gcc-goto.sh b/scripts/gcc-goto.sh deleted file mode 100755 index 8b980fb2270a..000000000000 --- a/scripts/gcc-goto.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 -# Test for gcc 'asm goto' support -# Copyright (C) 2010, Jason Baron <[email protected]> - -cat << "END" | $@ -x c - -fno-PIE -c -o /dev/null -int main(void) -{ -#if defined(__arm__) || defined(__aarch64__) - /* - * Not related to asm goto, but used by jump label - * and broken on some ARM GCC versions (see GCC Bug 48637). - */ - static struct { int dummy; int state; } tp; - asm (".long %c0" :: "i" (&tp.state)); -#endif - -entry: - asm goto ("" :::: entry); - return 0; -} -END diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 55e32af2e53f..2c80da0220c3 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -2021,13 +2021,11 @@ static void add_exported_symbols(struct buffer *buf, struct module *mod) /* record CRCs for exported symbols */ buf_printf(buf, "\n"); list_for_each_entry(sym, &mod->exported_symbols, list) { - if (!sym->crc_valid) { + if (!sym->crc_valid) warn("EXPORT symbol \"%s\" [%s%s] version generation failed, symbol will not be versioned.\n" "Is \"%s\" prototyped in <asm/asm-prototypes.h>?\n", sym->name, mod->name, mod->is_vmlinux ? "" : ".ko", sym->name); - continue; - } buf_printf(buf, "SYMBOL_CRC(%s, 0x%08x, \"%s\");\n", sym->name, sym->crc, sym->is_gpl_only ? "_gpl" : ""); diff --git a/security/loadpin/loadpin.c b/security/loadpin/loadpin.c index 6ab5f2bbf41f..44521582dcba 100644 --- a/security/loadpin/loadpin.c +++ b/security/loadpin/loadpin.c @@ -356,13 +356,11 @@ static long dm_verity_ioctl(struct file *filp, unsigned int cmd, unsigned long a { void __user *uarg = (void __user *)arg; unsigned int fd; - int rc; switch (cmd) { case LOADPIN_IOC_SET_TRUSTED_VERITY_DIGESTS: - rc = copy_from_user(&fd, uarg, sizeof(fd)); - if (rc) - return rc; + if (copy_from_user(&fd, uarg, sizeof(fd))) + return -EFAULT; return read_trusted_verity_root_digests(fd); diff --git a/sound/core/info.c b/sound/core/info.c index b8058b341178..0b2f04dcb589 100644 --- a/sound/core/info.c +++ b/sound/core/info.c @@ -111,9 +111,9 @@ static loff_t snd_info_entry_llseek(struct file *file, loff_t offset, int orig) entry = data->entry; mutex_lock(&entry->access); if (entry->c.ops->llseek) { - offset = entry->c.ops->llseek(entry, - data->file_private_data, - file, offset, orig); + ret = entry->c.ops->llseek(entry, + data->file_private_data, + file, offset, orig); goto out; } diff --git a/sound/pci/hda/cs35l41_hda.c b/sound/pci/hda/cs35l41_hda.c index 129bffb431c2..15e2a0009080 100644 --- a/sound/pci/hda/cs35l41_hda.c +++ b/sound/pci/hda/cs35l41_hda.c @@ -1163,6 +1163,11 @@ static int cs35l41_no_acpi_dsd(struct cs35l41_hda *cs35l41, struct device *physd hw_cfg->gpio1.func = CS35l41_VSPK_SWITCH; hw_cfg->gpio1.valid = true; } else { + /* + * Note: CLSA010(0/1) are special cases which use a slightly different design. + * All other HIDs e.g. CSC3551 require valid ACPI _DSD properties to be supported. + */ + dev_err(cs35l41->dev, "Error: ACPI _DSD Properties are missing for HID %s.\n", hid); hw_cfg->valid = false; hw_cfg->gpio1.valid = false; hw_cfg->gpio2.valid = false; diff --git a/sound/pci/hda/patch_cs8409-tables.c b/sound/pci/hda/patch_cs8409-tables.c index e0d3a8be2e38..b288874e401e 100644 --- a/sound/pci/hda/patch_cs8409-tables.c +++ b/sound/pci/hda/patch_cs8409-tables.c @@ -546,6 +546,10 @@ const struct snd_pci_quirk cs8409_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0BD6, "Dolphin", CS8409_DOLPHIN), SND_PCI_QUIRK(0x1028, 0x0BD7, "Dolphin", CS8409_DOLPHIN), SND_PCI_QUIRK(0x1028, 0x0BD8, "Dolphin", CS8409_DOLPHIN), + SND_PCI_QUIRK(0x1028, 0x0C43, "Dolphin", CS8409_DOLPHIN), + SND_PCI_QUIRK(0x1028, 0x0C50, "Dolphin", CS8409_DOLPHIN), + SND_PCI_QUIRK(0x1028, 0x0C51, "Dolphin", CS8409_DOLPHIN), + SND_PCI_QUIRK(0x1028, 0x0C52, "Dolphin", CS8409_DOLPHIN), {} /* terminator */ }; diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index fd630d62b5a0..47e72cf76608 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9283,6 +9283,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1271, "ASUS X430UN", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1290, "ASUS X441SA", ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x12a0, "ASUS X441UV", ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1043, 0x12af, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x12e0, "ASUS X541SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x12f0, "ASUS X541UV", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x1313, "Asus K42JZ", ALC269VB_FIXUP_ASUS_MIC_NO_PRESENCE), @@ -9303,6 +9304,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x19e1, "ASUS UX581LV", ALC295_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW), SND_PCI_QUIRK(0x1043, 0x1a30, "ASUS X705UD", ALC256_FIXUP_ASUS_MIC), + SND_PCI_QUIRK(0x1043, 0x1a8f, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1b11, "ASUS UX431DA", ALC294_FIXUP_ASUS_COEF_1B), SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1043, 0x1bbd, "ASUS Z550MA", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), @@ -9389,6 +9391,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x70f4, "Clevo NH77EPY", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x70f6, "Clevo NH77DPQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x7716, "Clevo NS50PU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x7717, "Clevo NS70PU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x7718, "Clevo L140PU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8228, "Clevo NR40BU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8520, "Clevo NH50D[CD]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), @@ -9490,6 +9493,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3852, "Lenovo Yoga 7 14ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3853, "Lenovo Yoga 7 15ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3855, "Legion 7 16ITHG6", ALC287_FIXUP_LEGION_16ITHG6), + SND_PCI_QUIRK(0x17aa, 0x3869, "Lenovo Yoga7 14IAL7", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC), SND_PCI_QUIRK(0x17aa, 0x3978, "Lenovo B50-70", ALC269_FIXUP_DMIC_THINKPAD_ACPI), diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index ecfe7a790790..e0b24e1daef3 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -143,6 +143,34 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "21CL"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21EM"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21EN"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21J5"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21J6"), + } + }, {} }; diff --git a/sound/soc/codecs/rt5640.c b/sound/soc/codecs/rt5640.c index 38ab8d4291c2..5a844329800f 100644 --- a/sound/soc/codecs/rt5640.c +++ b/sound/soc/codecs/rt5640.c @@ -1986,7 +1986,7 @@ static int rt5640_set_bias_level(struct snd_soc_component *component, snd_soc_component_write(component, RT5640_PWR_MIXER, 0x0000); if (rt5640->jd_src == RT5640_JD_SRC_HDA_HEADER) snd_soc_component_write(component, RT5640_PWR_ANLG1, - 0x0018); + 0x2818); else snd_soc_component_write(component, RT5640_PWR_ANLG1, 0x0000); @@ -2600,7 +2600,8 @@ static void rt5640_enable_hda_jack_detect( snd_soc_component_update_bits(component, RT5640_DUMMY1, 0x400, 0x0); snd_soc_component_update_bits(component, RT5640_PWR_ANLG1, - RT5640_PWR_VREF2, RT5640_PWR_VREF2); + RT5640_PWR_VREF2 | RT5640_PWR_MB | RT5640_PWR_BG, + RT5640_PWR_VREF2 | RT5640_PWR_MB | RT5640_PWR_BG); usleep_range(10000, 15000); snd_soc_component_update_bits(component, RT5640_PWR_ANLG1, RT5640_PWR_FV2, RT5640_PWR_FV2); diff --git a/sound/soc/codecs/tas2770.c b/sound/soc/codecs/tas2770.c index 3cb634c28261..bb653b664146 100644 --- a/sound/soc/codecs/tas2770.c +++ b/sound/soc/codecs/tas2770.c @@ -46,34 +46,22 @@ static void tas2770_reset(struct tas2770_priv *tas2770) usleep_range(1000, 2000); } -static int tas2770_set_bias_level(struct snd_soc_component *component, - enum snd_soc_bias_level level) +static int tas2770_update_pwr_ctrl(struct tas2770_priv *tas2770) { - struct tas2770_priv *tas2770 = - snd_soc_component_get_drvdata(component); + struct snd_soc_component *component = tas2770->component; + unsigned int val; + int ret; - switch (level) { - case SND_SOC_BIAS_ON: - snd_soc_component_update_bits(component, TAS2770_PWR_CTRL, - TAS2770_PWR_CTRL_MASK, - TAS2770_PWR_CTRL_ACTIVE); - break; - case SND_SOC_BIAS_STANDBY: - case SND_SOC_BIAS_PREPARE: - snd_soc_component_update_bits(component, TAS2770_PWR_CTRL, - TAS2770_PWR_CTRL_MASK, - TAS2770_PWR_CTRL_MUTE); - break; - case SND_SOC_BIAS_OFF: - snd_soc_component_update_bits(component, TAS2770_PWR_CTRL, - TAS2770_PWR_CTRL_MASK, - TAS2770_PWR_CTRL_SHUTDOWN); - break; + if (tas2770->dac_powered) + val = tas2770->unmuted ? + TAS2770_PWR_CTRL_ACTIVE : TAS2770_PWR_CTRL_MUTE; + else + val = TAS2770_PWR_CTRL_SHUTDOWN; - default: - dev_err(tas2770->dev, "wrong power level setting %d\n", level); - return -EINVAL; - } + ret = snd_soc_component_update_bits(component, TAS2770_PWR_CTRL, + TAS2770_PWR_CTRL_MASK, val); + if (ret < 0) + return ret; return 0; } @@ -114,9 +102,7 @@ static int tas2770_codec_resume(struct snd_soc_component *component) gpiod_set_value_cansleep(tas2770->sdz_gpio, 1); usleep_range(1000, 2000); } else { - ret = snd_soc_component_update_bits(component, TAS2770_PWR_CTRL, - TAS2770_PWR_CTRL_MASK, - TAS2770_PWR_CTRL_ACTIVE); + ret = tas2770_update_pwr_ctrl(tas2770); if (ret < 0) return ret; } @@ -152,24 +138,19 @@ static int tas2770_dac_event(struct snd_soc_dapm_widget *w, switch (event) { case SND_SOC_DAPM_POST_PMU: - ret = snd_soc_component_update_bits(component, TAS2770_PWR_CTRL, - TAS2770_PWR_CTRL_MASK, - TAS2770_PWR_CTRL_MUTE); + tas2770->dac_powered = 1; + ret = tas2770_update_pwr_ctrl(tas2770); break; case SND_SOC_DAPM_PRE_PMD: - ret = snd_soc_component_update_bits(component, TAS2770_PWR_CTRL, - TAS2770_PWR_CTRL_MASK, - TAS2770_PWR_CTRL_SHUTDOWN); + tas2770->dac_powered = 0; + ret = tas2770_update_pwr_ctrl(tas2770); break; default: dev_err(tas2770->dev, "Not supported evevt\n"); return -EINVAL; } - if (ret < 0) - return ret; - - return 0; + return ret; } static const struct snd_kcontrol_new isense_switch = @@ -203,21 +184,11 @@ static const struct snd_soc_dapm_route tas2770_audio_map[] = { static int tas2770_mute(struct snd_soc_dai *dai, int mute, int direction) { struct snd_soc_component *component = dai->component; - int ret; - - if (mute) - ret = snd_soc_component_update_bits(component, TAS2770_PWR_CTRL, - TAS2770_PWR_CTRL_MASK, - TAS2770_PWR_CTRL_MUTE); - else - ret = snd_soc_component_update_bits(component, TAS2770_PWR_CTRL, - TAS2770_PWR_CTRL_MASK, - TAS2770_PWR_CTRL_ACTIVE); - - if (ret < 0) - return ret; + struct tas2770_priv *tas2770 = + snd_soc_component_get_drvdata(component); - return 0; + tas2770->unmuted = !mute; + return tas2770_update_pwr_ctrl(tas2770); } static int tas2770_set_bitwidth(struct tas2770_priv *tas2770, int bitwidth) @@ -337,7 +308,7 @@ static int tas2770_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) struct snd_soc_component *component = dai->component; struct tas2770_priv *tas2770 = snd_soc_component_get_drvdata(component); - u8 tdm_rx_start_slot = 0, asi_cfg_1 = 0; + u8 tdm_rx_start_slot = 0, invert_fpol = 0, fpol_preinv = 0, asi_cfg_1 = 0; int ret; switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { @@ -349,9 +320,15 @@ static int tas2770_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) } switch (fmt & SND_SOC_DAIFMT_INV_MASK) { + case SND_SOC_DAIFMT_NB_IF: + invert_fpol = 1; + fallthrough; case SND_SOC_DAIFMT_NB_NF: asi_cfg_1 |= TAS2770_TDM_CFG_REG1_RX_RSING; break; + case SND_SOC_DAIFMT_IB_IF: + invert_fpol = 1; + fallthrough; case SND_SOC_DAIFMT_IB_NF: asi_cfg_1 |= TAS2770_TDM_CFG_REG1_RX_FALING; break; @@ -369,15 +346,19 @@ static int tas2770_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { case SND_SOC_DAIFMT_I2S: tdm_rx_start_slot = 1; + fpol_preinv = 0; break; case SND_SOC_DAIFMT_DSP_A: tdm_rx_start_slot = 0; + fpol_preinv = 1; break; case SND_SOC_DAIFMT_DSP_B: tdm_rx_start_slot = 1; + fpol_preinv = 1; break; case SND_SOC_DAIFMT_LEFT_J: tdm_rx_start_slot = 0; + fpol_preinv = 1; break; default: dev_err(tas2770->dev, @@ -391,6 +372,14 @@ static int tas2770_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) if (ret < 0) return ret; + ret = snd_soc_component_update_bits(component, TAS2770_TDM_CFG_REG0, + TAS2770_TDM_CFG_REG0_FPOL_MASK, + (fpol_preinv ^ invert_fpol) + ? TAS2770_TDM_CFG_REG0_FPOL_RSING + : TAS2770_TDM_CFG_REG0_FPOL_FALING); + if (ret < 0) + return ret; + return 0; } @@ -489,7 +478,7 @@ static struct snd_soc_dai_driver tas2770_dai_driver[] = { .id = 0, .playback = { .stream_name = "ASI1 Playback", - .channels_min = 2, + .channels_min = 1, .channels_max = 2, .rates = TAS2770_RATES, .formats = TAS2770_FORMATS, @@ -537,7 +526,6 @@ static const struct snd_soc_component_driver soc_component_driver_tas2770 = { .probe = tas2770_codec_probe, .suspend = tas2770_codec_suspend, .resume = tas2770_codec_resume, - .set_bias_level = tas2770_set_bias_level, .controls = tas2770_snd_controls, .num_controls = ARRAY_SIZE(tas2770_snd_controls), .dapm_widgets = tas2770_dapm_widgets, diff --git a/sound/soc/codecs/tas2770.h b/sound/soc/codecs/tas2770.h index d156666bcc55..f75f40781ab1 100644 --- a/sound/soc/codecs/tas2770.h +++ b/sound/soc/codecs/tas2770.h @@ -41,6 +41,9 @@ #define TAS2770_TDM_CFG_REG0_31_44_1_48KHZ 0x6 #define TAS2770_TDM_CFG_REG0_31_88_2_96KHZ 0x8 #define TAS2770_TDM_CFG_REG0_31_176_4_192KHZ 0xa +#define TAS2770_TDM_CFG_REG0_FPOL_MASK BIT(0) +#define TAS2770_TDM_CFG_REG0_FPOL_RSING 0 +#define TAS2770_TDM_CFG_REG0_FPOL_FALING 1 /* TDM Configuration Reg1 */ #define TAS2770_TDM_CFG_REG1 TAS2770_REG(0X0, 0x0B) #define TAS2770_TDM_CFG_REG1_MASK GENMASK(5, 1) @@ -135,6 +138,8 @@ struct tas2770_priv { struct device *dev; int v_sense_slot; int i_sense_slot; + bool dac_powered; + bool unmuted; }; #endif /* __TAS2770__ */ diff --git a/sound/soc/codecs/tlv320aic32x4.c b/sound/soc/codecs/tlv320aic32x4.c index 4b74805cdd2e..ffe1828a4b7e 100644 --- a/sound/soc/codecs/tlv320aic32x4.c +++ b/sound/soc/codecs/tlv320aic32x4.c @@ -49,6 +49,8 @@ struct aic32x4_priv { struct aic32x4_setup_data *setup; struct device *dev; enum aic32x4_type type; + + unsigned int fmt; }; static int aic32x4_reset_adc(struct snd_soc_dapm_widget *w, @@ -611,6 +613,7 @@ static int aic32x4_set_dai_sysclk(struct snd_soc_dai *codec_dai, static int aic32x4_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt) { struct snd_soc_component *component = codec_dai->component; + struct aic32x4_priv *aic32x4 = snd_soc_component_get_drvdata(component); u8 iface_reg_1 = 0; u8 iface_reg_2 = 0; u8 iface_reg_3 = 0; @@ -653,6 +656,8 @@ static int aic32x4_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt) return -EINVAL; } + aic32x4->fmt = fmt; + snd_soc_component_update_bits(component, AIC32X4_IFACE1, AIC32X4_IFACE1_DATATYPE_MASK | AIC32X4_IFACE1_MASTER_MASK, iface_reg_1); @@ -757,6 +762,10 @@ static int aic32x4_setup_clocks(struct snd_soc_component *component, return -EINVAL; } + /* PCM over I2S is always 2-channel */ + if ((aic32x4->fmt & SND_SOC_DAIFMT_FORMAT_MASK) == SND_SOC_DAIFMT_I2S) + channels = 2; + madc = DIV_ROUND_UP((32 * adc_resource_class), aosr); max_dosr = (AIC32X4_MAX_DOSR_FREQ / sample_rate / dosr_increment) * dosr_increment; diff --git a/sound/soc/intel/avs/pcm.c b/sound/soc/intel/avs/pcm.c index f21b0cdd3206..8fe5917b1e26 100644 --- a/sound/soc/intel/avs/pcm.c +++ b/sound/soc/intel/avs/pcm.c @@ -636,8 +636,8 @@ static ssize_t topology_name_read(struct file *file, char __user *user_buf, size char buf[64]; size_t len; - len = snprintf(buf, sizeof(buf), "%s/%s\n", component->driver->topology_name_prefix, - mach->tplg_filename); + len = scnprintf(buf, sizeof(buf), "%s/%s\n", component->driver->topology_name_prefix, + mach->tplg_filename); return simple_read_from_buffer(user_buf, count, ppos, buf, len); } diff --git a/sound/soc/intel/boards/sof_es8336.c b/sound/soc/intel/boards/sof_es8336.c index c7f33c89588e..606cc3242a60 100644 --- a/sound/soc/intel/boards/sof_es8336.c +++ b/sound/soc/intel/boards/sof_es8336.c @@ -760,6 +760,9 @@ static int sof_es8336_remove(struct platform_device *pdev) static const struct platform_device_id board_ids[] = { { + .name = "sof-essx8336", /* default quirk == 0 */ + }, + { .name = "adl_es83x6_c1_h02", .driver_data = (kernel_ulong_t)(SOF_ES8336_SSP_CODEC(1) | SOF_NO_OF_HDMI_CAPTURE_SSP(2) | @@ -786,5 +789,4 @@ module_platform_driver(sof_es8336_driver); MODULE_DESCRIPTION("ASoC Intel(R) SOF + ES8336 Machine driver"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("platform:sof-essx8336"); MODULE_IMPORT_NS(SND_SOC_INTEL_HDA_DSP_COMMON); diff --git a/sound/soc/sh/rz-ssi.c b/sound/soc/sh/rz-ssi.c index 0d0594a0e4f6..7ace0c0db5b1 100644 --- a/sound/soc/sh/rz-ssi.c +++ b/sound/soc/sh/rz-ssi.c @@ -1017,32 +1017,36 @@ static int rz_ssi_probe(struct platform_device *pdev) ssi->rstc = devm_reset_control_get_exclusive(&pdev->dev, NULL); if (IS_ERR(ssi->rstc)) { - rz_ssi_release_dma_channels(ssi); - return PTR_ERR(ssi->rstc); + ret = PTR_ERR(ssi->rstc); + goto err_reset; } reset_control_deassert(ssi->rstc); pm_runtime_enable(&pdev->dev); ret = pm_runtime_resume_and_get(&pdev->dev); if (ret < 0) { - rz_ssi_release_dma_channels(ssi); - pm_runtime_disable(ssi->dev); - reset_control_assert(ssi->rstc); - return dev_err_probe(ssi->dev, ret, "pm_runtime_resume_and_get failed\n"); + dev_err(&pdev->dev, "pm_runtime_resume_and_get failed\n"); + goto err_pm; } ret = devm_snd_soc_register_component(&pdev->dev, &rz_ssi_soc_component, rz_ssi_soc_dai, ARRAY_SIZE(rz_ssi_soc_dai)); if (ret < 0) { - rz_ssi_release_dma_channels(ssi); - - pm_runtime_put(ssi->dev); - pm_runtime_disable(ssi->dev); - reset_control_assert(ssi->rstc); dev_err(&pdev->dev, "failed to register snd component\n"); + goto err_snd_soc; } + return 0; + +err_snd_soc: + pm_runtime_put(ssi->dev); +err_pm: + pm_runtime_disable(ssi->dev); + reset_control_assert(ssi->rstc); +err_reset: + rz_ssi_release_dma_channels(ssi); + return ret; } diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 5b99bf2dbd08..4f60c0a83311 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -1317,6 +1317,9 @@ static struct snd_soc_pcm_runtime *dpcm_get_be(struct snd_soc_card *card, if (!be->dai_link->no_pcm) continue; + if (!snd_soc_dpcm_get_substream(be, stream)) + continue; + for_each_rtd_dais(be, i, dai) { w = snd_soc_dai_get_widget(dai, stream); diff --git a/sound/soc/sof/debug.c b/sound/soc/sof/debug.c index c5d797e97c02..d9a3ce7b69e1 100644 --- a/sound/soc/sof/debug.c +++ b/sound/soc/sof/debug.c @@ -252,9 +252,9 @@ static int memory_info_update(struct snd_sof_dev *sdev, char *buf, size_t buff_s } for (i = 0, len = 0; i < reply->num_elems; i++) { - ret = snprintf(buf + len, buff_size - len, "zone %d.%d used %#8x free %#8x\n", - reply->elems[i].zone, reply->elems[i].id, - reply->elems[i].used, reply->elems[i].free); + ret = scnprintf(buf + len, buff_size - len, "zone %d.%d used %#8x free %#8x\n", + reply->elems[i].zone, reply->elems[i].id, + reply->elems[i].used, reply->elems[i].free); if (ret < 0) goto error; len += ret; diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c index 8639ea63a10d..6d4ecbe14adf 100644 --- a/sound/soc/sof/intel/hda.c +++ b/sound/soc/sof/intel/hda.c @@ -574,7 +574,7 @@ static void hda_dsp_dump_ext_rom_status(struct snd_sof_dev *sdev, const char *le chip = get_chip_info(sdev->pdata); for (i = 0; i < HDA_EXT_ROM_STATUS_SIZE; i++) { value = snd_sof_dsp_read(sdev, HDA_DSP_BAR, chip->rom_status_reg + i * 0x4); - len += snprintf(msg + len, sizeof(msg) - len, " 0x%x", value); + len += scnprintf(msg + len, sizeof(msg) - len, " 0x%x", value); } dev_printk(level, sdev->dev, "extended rom status: %s", msg); diff --git a/sound/soc/sof/ipc3-topology.c b/sound/soc/sof/ipc3-topology.c index b2cc046b9f60..65923e7a5976 100644 --- a/sound/soc/sof/ipc3-topology.c +++ b/sound/soc/sof/ipc3-topology.c @@ -2338,7 +2338,7 @@ static int sof_ipc3_parse_manifest(struct snd_soc_component *scomp, int index, } dev_info(scomp->dev, - "Topology: ABI %d:%d:%d Kernel ABI %hhu:%hhu:%hhu\n", + "Topology: ABI %d:%d:%d Kernel ABI %d:%d:%d\n", man->priv.data[0], man->priv.data[1], man->priv.data[2], SOF_ABI_MAJOR, SOF_ABI_MINOR, SOF_ABI_PATCH); diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h index 7a6b14874d65..a73cf01a1606 100644 --- a/tools/arch/s390/include/uapi/asm/kvm.h +++ b/tools/arch/s390/include/uapi/asm/kvm.h @@ -74,6 +74,7 @@ struct kvm_s390_io_adapter_req { #define KVM_S390_VM_CRYPTO 2 #define KVM_S390_VM_CPU_MODEL 3 #define KVM_S390_VM_MIGRATION 4 +#define KVM_S390_VM_CPU_TOPOLOGY 5 /* kvm attributes for mem_ctrl */ #define KVM_S390_VM_MEM_ENABLE_CMMA 0 diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 8323ac5b7eee..235dc85c91c3 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -219,7 +219,7 @@ #define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ #define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ #define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ -#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 or above (Zen) */ +#define X86_FEATURE_ZEN (7*32+28) /* "" CPU based on Zen microarchitecture */ #define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ #define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */ #define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */ @@ -303,7 +303,7 @@ #define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ #define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ #define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ -#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM-Exit when EIBRS is enabled */ +#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ @@ -354,6 +354,7 @@ #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ #define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ #define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ +#define X86_FEATURE_X2AVIC (15*32+18) /* Virtual x2apic */ #define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* Virtual SPEC_CTRL */ #define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */ @@ -457,5 +458,6 @@ #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ #define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ #define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */ +#define X86_BUG_EIBRS_PBRSB X86_BUG(27) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index e057e039173c..6674bdb096f3 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -235,6 +235,12 @@ #define PERF_CAP_PT_IDX 16 #define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 +#define PERF_CAP_PEBS_TRAP BIT_ULL(6) +#define PERF_CAP_ARCH_REG BIT_ULL(7) +#define PERF_CAP_PEBS_FORMAT 0xf00 +#define PERF_CAP_PEBS_BASELINE BIT_ULL(14) +#define PERF_CAP_PEBS_MASK (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \ + PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE) #define MSR_IA32_RTIT_CTL 0x00000570 #define RTIT_CTL_TRACEEN BIT(0) @@ -392,6 +398,7 @@ #define MSR_TURBO_ACTIVATION_RATIO 0x0000064C #define MSR_PLATFORM_ENERGY_STATUS 0x0000064D +#define MSR_SECONDARY_TURBO_RATIO_LIMIT 0x00000650 #define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658 #define MSR_PKG_ANY_CORE_C0_RES 0x00000659 @@ -1022,6 +1029,7 @@ #define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f #define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490 #define MSR_IA32_VMX_VMFUNC 0x00000491 +#define MSR_IA32_VMX_PROCBASED_CTLS3 0x00000492 /* VMX_BASIC bits and bitmasks */ #define VMX_BASIC_VMCS_SIZE_SHIFT 32 diff --git a/tools/arch/x86/include/asm/rmwcc.h b/tools/arch/x86/include/asm/rmwcc.h index fee7983a90b4..11ff975242ca 100644 --- a/tools/arch/x86/include/asm/rmwcc.h +++ b/tools/arch/x86/include/asm/rmwcc.h @@ -2,8 +2,6 @@ #ifndef _TOOLS_LINUX_ASM_X86_RMWcc #define _TOOLS_LINUX_ASM_X86_RMWcc -#ifdef CONFIG_CC_HAS_ASM_GOTO - #define __GEN_RMWcc(fullop, var, cc, ...) \ do { \ asm_volatile_goto (fullop "; j" cc " %l[cc_label]" \ @@ -20,23 +18,4 @@ cc_label: \ #define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \ __GEN_RMWcc(op " %1, " arg0, var, cc, vcon (val)) -#else /* !CONFIG_CC_HAS_ASM_GOTO */ - -#define __GEN_RMWcc(fullop, var, cc, ...) \ -do { \ - char c; \ - asm volatile (fullop "; set" cc " %1" \ - : "+m" (var), "=qm" (c) \ - : __VA_ARGS__ : "memory"); \ - return c != 0; \ -} while (0) - -#define GEN_UNARY_RMWcc(op, var, arg0, cc) \ - __GEN_RMWcc(op " " arg0, var, cc) - -#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \ - __GEN_RMWcc(op " %2, " arg0, var, cc, vcon (val)) - -#endif /* CONFIG_CC_HAS_ASM_GOTO */ - #endif /* _TOOLS_LINUX_ASM_X86_RMWcc */ diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index ec53c9fa1da9..46de10a809ec 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -306,7 +306,8 @@ struct kvm_pit_state { struct kvm_pit_channel_state channels[3]; }; -#define KVM_PIT_FLAGS_HPET_LEGACY 0x00000001 +#define KVM_PIT_FLAGS_HPET_LEGACY 0x00000001 +#define KVM_PIT_FLAGS_SPEAKER_DATA_ON 0x00000002 struct kvm_pit_state2 { struct kvm_pit_channel_state channels[3]; @@ -325,6 +326,7 @@ struct kvm_reinject_control { #define KVM_VCPUEVENT_VALID_SHADOW 0x00000004 #define KVM_VCPUEVENT_VALID_SMM 0x00000008 #define KVM_VCPUEVENT_VALID_PAYLOAD 0x00000010 +#define KVM_VCPUEVENT_VALID_TRIPLE_FAULT 0x00000020 /* Interrupt shadow states */ #define KVM_X86_SHADOW_INT_MOV_SS 0x01 @@ -359,7 +361,10 @@ struct kvm_vcpu_events { __u8 smm_inside_nmi; __u8 latched_init; } smi; - __u8 reserved[27]; + struct { + __u8 pending; + } triple_fault; + __u8 reserved[26]; __u8 exception_has_payload; __u64 exception_payload; }; @@ -434,6 +439,7 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3) #define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4) #define KVM_X86_QUIRK_FIX_HYPERCALL_INSN (1 << 5) +#define KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS (1 << 6) #define KVM_STATE_NESTED_FORMAT_VMX 0 #define KVM_STATE_NESTED_FORMAT_SVM 1 diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h index 946d761adbd3..a5faf6d88f1b 100644 --- a/tools/arch/x86/include/uapi/asm/vmx.h +++ b/tools/arch/x86/include/uapi/asm/vmx.h @@ -91,6 +91,7 @@ #define EXIT_REASON_UMWAIT 67 #define EXIT_REASON_TPAUSE 68 #define EXIT_REASON_BUS_LOCK 74 +#define EXIT_REASON_NOTIFY 75 #define VMX_EXIT_REASONS \ { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ @@ -153,7 +154,8 @@ { EXIT_REASON_XRSTORS, "XRSTORS" }, \ { EXIT_REASON_UMWAIT, "UMWAIT" }, \ { EXIT_REASON_TPAUSE, "TPAUSE" }, \ - { EXIT_REASON_BUS_LOCK, "BUS_LOCK" } + { EXIT_REASON_BUS_LOCK, "BUS_LOCK" }, \ + { EXIT_REASON_NOTIFY, "NOTIFY" } #define VMX_EXIT_REASON_FLAGS \ { VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" } diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index b28ff5d88145..520ad2691a99 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -751,14 +751,27 @@ typedef struct drm_i915_irq_wait { /* Must be kept compact -- no holes and well documented */ -typedef struct drm_i915_getparam { +/** + * struct drm_i915_getparam - Driver parameter query structure. + */ +struct drm_i915_getparam { + /** @param: Driver parameter to query. */ __s32 param; - /* + + /** + * @value: Address of memory where queried value should be put. + * * WARNING: Using pointers instead of fixed-size u64 means we need to write * compat32 code. Don't repeat this mistake. */ int __user *value; -} drm_i915_getparam_t; +}; + +/** + * typedef drm_i915_getparam_t - Driver parameter query structure. + * See struct drm_i915_getparam. + */ +typedef struct drm_i915_getparam drm_i915_getparam_t; /* Ioctl to set kernel params: */ @@ -1239,76 +1252,119 @@ struct drm_i915_gem_exec_object2 { __u64 rsvd2; }; +/** + * struct drm_i915_gem_exec_fence - An input or output fence for the execbuf + * ioctl. + * + * The request will wait for input fence to signal before submission. + * + * The returned output fence will be signaled after the completion of the + * request. + */ struct drm_i915_gem_exec_fence { - /** - * User's handle for a drm_syncobj to wait on or signal. - */ + /** @handle: User's handle for a drm_syncobj to wait on or signal. */ __u32 handle; + /** + * @flags: Supported flags are: + * + * I915_EXEC_FENCE_WAIT: + * Wait for the input fence before request submission. + * + * I915_EXEC_FENCE_SIGNAL: + * Return request completion fence as output + */ + __u32 flags; #define I915_EXEC_FENCE_WAIT (1<<0) #define I915_EXEC_FENCE_SIGNAL (1<<1) #define __I915_EXEC_FENCE_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SIGNAL << 1)) - __u32 flags; }; -/* - * See drm_i915_gem_execbuffer_ext_timeline_fences. - */ -#define DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES 0 - -/* +/** + * struct drm_i915_gem_execbuffer_ext_timeline_fences - Timeline fences + * for execbuf ioctl. + * * This structure describes an array of drm_syncobj and associated points for * timeline variants of drm_syncobj. It is invalid to append this structure to * the execbuf if I915_EXEC_FENCE_ARRAY is set. */ struct drm_i915_gem_execbuffer_ext_timeline_fences { +#define DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES 0 + /** @base: Extension link. See struct i915_user_extension. */ struct i915_user_extension base; /** - * Number of element in the handles_ptr & value_ptr arrays. + * @fence_count: Number of elements in the @handles_ptr & @value_ptr + * arrays. */ __u64 fence_count; /** - * Pointer to an array of struct drm_i915_gem_exec_fence of length - * fence_count. + * @handles_ptr: Pointer to an array of struct drm_i915_gem_exec_fence + * of length @fence_count. */ __u64 handles_ptr; /** - * Pointer to an array of u64 values of length fence_count. Values - * must be 0 for a binary drm_syncobj. A Value of 0 for a timeline - * drm_syncobj is invalid as it turns a drm_syncobj into a binary one. + * @values_ptr: Pointer to an array of u64 values of length + * @fence_count. + * Values must be 0 for a binary drm_syncobj. A Value of 0 for a + * timeline drm_syncobj is invalid as it turns a drm_syncobj into a + * binary one. */ __u64 values_ptr; }; +/** + * struct drm_i915_gem_execbuffer2 - Structure for DRM_I915_GEM_EXECBUFFER2 + * ioctl. + */ struct drm_i915_gem_execbuffer2 { - /** - * List of gem_exec_object2 structs - */ + /** @buffers_ptr: Pointer to a list of gem_exec_object2 structs */ __u64 buffers_ptr; + + /** @buffer_count: Number of elements in @buffers_ptr array */ __u32 buffer_count; - /** Offset in the batchbuffer to start execution from. */ + /** + * @batch_start_offset: Offset in the batchbuffer to start execution + * from. + */ __u32 batch_start_offset; - /** Bytes used in batchbuffer from batch_start_offset */ + + /** + * @batch_len: Length in bytes of the batch buffer, starting from the + * @batch_start_offset. If 0, length is assumed to be the batch buffer + * object size. + */ __u32 batch_len; + + /** @DR1: deprecated */ __u32 DR1; + + /** @DR4: deprecated */ __u32 DR4; + + /** @num_cliprects: See @cliprects_ptr */ __u32 num_cliprects; + /** - * This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY - * & I915_EXEC_USE_EXTENSIONS are not set. + * @cliprects_ptr: Kernel clipping was a DRI1 misfeature. + * + * It is invalid to use this field if I915_EXEC_FENCE_ARRAY or + * I915_EXEC_USE_EXTENSIONS flags are not set. * * If I915_EXEC_FENCE_ARRAY is set, then this is a pointer to an array - * of struct drm_i915_gem_exec_fence and num_cliprects is the length - * of the array. + * of &drm_i915_gem_exec_fence and @num_cliprects is the length of the + * array. * * If I915_EXEC_USE_EXTENSIONS is set, then this is a pointer to a - * single struct i915_user_extension and num_cliprects is 0. + * single &i915_user_extension and num_cliprects is 0. */ __u64 cliprects_ptr; + + /** @flags: Execbuf flags */ + __u64 flags; #define I915_EXEC_RING_MASK (0x3f) #define I915_EXEC_DEFAULT (0<<0) #define I915_EXEC_RENDER (1<<0) @@ -1326,10 +1382,6 @@ struct drm_i915_gem_execbuffer2 { #define I915_EXEC_CONSTANTS_REL_GENERAL (0<<6) /* default */ #define I915_EXEC_CONSTANTS_ABSOLUTE (1<<6) #define I915_EXEC_CONSTANTS_REL_SURFACE (2<<6) /* gen4/5 only */ - __u64 flags; - __u64 rsvd1; /* now used for context info */ - __u64 rsvd2; -}; /** Resets the SO write offset registers for transform feedback on gen7. */ #define I915_EXEC_GEN7_SOL_RESET (1<<8) @@ -1432,9 +1484,23 @@ struct drm_i915_gem_execbuffer2 { * drm_i915_gem_execbuffer_ext enum. */ #define I915_EXEC_USE_EXTENSIONS (1 << 21) - #define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_USE_EXTENSIONS << 1)) + /** @rsvd1: Context id */ + __u64 rsvd1; + + /** + * @rsvd2: in and out sync_file file descriptors. + * + * When I915_EXEC_FENCE_IN or I915_EXEC_FENCE_SUBMIT flag is set, the + * lower 32 bits of this field will have the in sync_file fd (input). + * + * When I915_EXEC_FENCE_OUT flag is set, the upper 32 bits of this + * field will have the out sync_file fd (output). + */ + __u64 rsvd2; +}; + #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ (eb2).rsvd1 = context & I915_EXEC_CONTEXT_ID_MASK @@ -1814,19 +1880,58 @@ struct drm_i915_gem_context_create { __u32 pad; }; +/** + * struct drm_i915_gem_context_create_ext - Structure for creating contexts. + */ struct drm_i915_gem_context_create_ext { - __u32 ctx_id; /* output: id of new context*/ + /** @ctx_id: Id of the created context (output) */ + __u32 ctx_id; + + /** + * @flags: Supported flags are: + * + * I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS: + * + * Extensions may be appended to this structure and driver must check + * for those. See @extensions. + * + * I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE + * + * Created context will have single timeline. + */ __u32 flags; #define I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS (1u << 0) #define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE (1u << 1) #define I915_CONTEXT_CREATE_FLAGS_UNKNOWN \ (-(I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE << 1)) + + /** + * @extensions: Zero-terminated chain of extensions. + * + * I915_CONTEXT_CREATE_EXT_SETPARAM: + * Context parameter to set or query during context creation. + * See struct drm_i915_gem_context_create_ext_setparam. + * + * I915_CONTEXT_CREATE_EXT_CLONE: + * This extension has been removed. On the off chance someone somewhere + * has attempted to use it, never re-use this extension number. + */ __u64 extensions; +#define I915_CONTEXT_CREATE_EXT_SETPARAM 0 +#define I915_CONTEXT_CREATE_EXT_CLONE 1 }; +/** + * struct drm_i915_gem_context_param - Context parameter to set or query. + */ struct drm_i915_gem_context_param { + /** @ctx_id: Context id */ __u32 ctx_id; + + /** @size: Size of the parameter @value */ __u32 size; + + /** @param: Parameter to set or query */ __u64 param; #define I915_CONTEXT_PARAM_BAN_PERIOD 0x1 /* I915_CONTEXT_PARAM_NO_ZEROMAP has been removed. On the off chance @@ -1973,6 +2078,7 @@ struct drm_i915_gem_context_param { #define I915_CONTEXT_PARAM_PROTECTED_CONTENT 0xd /* Must be kept compact -- no holes and well documented */ + /** @value: Context parameter value to be set or queried */ __u64 value; }; @@ -2371,23 +2477,29 @@ struct i915_context_param_engines { struct i915_engine_class_instance engines[N__]; \ } __attribute__((packed)) name__ +/** + * struct drm_i915_gem_context_create_ext_setparam - Context parameter + * to set or query during context creation. + */ struct drm_i915_gem_context_create_ext_setparam { -#define I915_CONTEXT_CREATE_EXT_SETPARAM 0 + /** @base: Extension link. See struct i915_user_extension. */ struct i915_user_extension base; + + /** + * @param: Context parameter to set or query. + * See struct drm_i915_gem_context_param. + */ struct drm_i915_gem_context_param param; }; -/* This API has been removed. On the off chance someone somewhere has - * attempted to use it, never re-use this extension number. - */ -#define I915_CONTEXT_CREATE_EXT_CLONE 1 - struct drm_i915_gem_context_destroy { __u32 ctx_id; __u32 pad; }; -/* +/** + * struct drm_i915_gem_vm_control - Structure to create or destroy VM. + * * DRM_I915_GEM_VM_CREATE - * * Create a new virtual memory address space (ppGTT) for use within a context @@ -2397,20 +2509,23 @@ struct drm_i915_gem_context_destroy { * The id of new VM (bound to the fd) for use with I915_CONTEXT_PARAM_VM is * returned in the outparam @id. * - * No flags are defined, with all bits reserved and must be zero. - * * An extension chain maybe provided, starting with @extensions, and terminated * by the @next_extension being 0. Currently, no extensions are defined. * * DRM_I915_GEM_VM_DESTROY - * - * Destroys a previously created VM id, specified in @id. + * Destroys a previously created VM id, specified in @vm_id. * * No extensions or flags are allowed currently, and so must be zero. */ struct drm_i915_gem_vm_control { + /** @extensions: Zero-terminated chain of extensions. */ __u64 extensions; + + /** @flags: reserved for future usage, currently MBZ */ __u32 flags; + + /** @vm_id: Id of the VM created or to be destroyed */ __u32 vm_id; }; @@ -3207,36 +3322,6 @@ struct drm_i915_gem_memory_class_instance { * struct drm_i915_memory_region_info - Describes one region as known to the * driver. * - * Note that we reserve some stuff here for potential future work. As an example - * we might want expose the capabilities for a given region, which could include - * things like if the region is CPU mappable/accessible, what are the supported - * mapping types etc. - * - * Note that to extend struct drm_i915_memory_region_info and struct - * drm_i915_query_memory_regions in the future the plan is to do the following: - * - * .. code-block:: C - * - * struct drm_i915_memory_region_info { - * struct drm_i915_gem_memory_class_instance region; - * union { - * __u32 rsvd0; - * __u32 new_thing1; - * }; - * ... - * union { - * __u64 rsvd1[8]; - * struct { - * __u64 new_thing2; - * __u64 new_thing3; - * ... - * }; - * }; - * }; - * - * With this things should remain source compatible between versions for - * userspace, even as we add new fields. - * * Note this is using both struct drm_i915_query_item and struct drm_i915_query. * For this new query we are adding the new query id DRM_I915_QUERY_MEMORY_REGIONS * at &drm_i915_query_item.query_id. @@ -3248,14 +3333,81 @@ struct drm_i915_memory_region_info { /** @rsvd0: MBZ */ __u32 rsvd0; - /** @probed_size: Memory probed by the driver (-1 = unknown) */ + /** + * @probed_size: Memory probed by the driver + * + * Note that it should not be possible to ever encounter a zero value + * here, also note that no current region type will ever return -1 here. + * Although for future region types, this might be a possibility. The + * same applies to the other size fields. + */ __u64 probed_size; - /** @unallocated_size: Estimate of memory remaining (-1 = unknown) */ + /** + * @unallocated_size: Estimate of memory remaining + * + * Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable accounting. + * Without this (or if this is an older kernel) the value here will + * always equal the @probed_size. Note this is only currently tracked + * for I915_MEMORY_CLASS_DEVICE regions (for other types the value here + * will always equal the @probed_size). + */ __u64 unallocated_size; - /** @rsvd1: MBZ */ - __u64 rsvd1[8]; + union { + /** @rsvd1: MBZ */ + __u64 rsvd1[8]; + struct { + /** + * @probed_cpu_visible_size: Memory probed by the driver + * that is CPU accessible. + * + * This will be always be <= @probed_size, and the + * remainder (if there is any) will not be CPU + * accessible. + * + * On systems without small BAR, the @probed_size will + * always equal the @probed_cpu_visible_size, since all + * of it will be CPU accessible. + * + * Note this is only tracked for + * I915_MEMORY_CLASS_DEVICE regions (for other types the + * value here will always equal the @probed_size). + * + * Note that if the value returned here is zero, then + * this must be an old kernel which lacks the relevant + * small-bar uAPI support (including + * I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS), but on + * such systems we should never actually end up with a + * small BAR configuration, assuming we are able to load + * the kernel module. Hence it should be safe to treat + * this the same as when @probed_cpu_visible_size == + * @probed_size. + */ + __u64 probed_cpu_visible_size; + + /** + * @unallocated_cpu_visible_size: Estimate of CPU + * visible memory remaining. + * + * Note this is only tracked for + * I915_MEMORY_CLASS_DEVICE regions (for other types the + * value here will always equal the + * @probed_cpu_visible_size). + * + * Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable + * accounting. Without this the value here will always + * equal the @probed_cpu_visible_size. Note this is only + * currently tracked for I915_MEMORY_CLASS_DEVICE + * regions (for other types the value here will also + * always equal the @probed_cpu_visible_size). + * + * If this is an older kernel the value here will be + * zero, see also @probed_cpu_visible_size. + */ + __u64 unallocated_cpu_visible_size; + }; + }; }; /** @@ -3329,11 +3481,11 @@ struct drm_i915_query_memory_regions { * struct drm_i915_gem_create_ext - Existing gem_create behaviour, with added * extension support using struct i915_user_extension. * - * Note that in the future we want to have our buffer flags here, at least for - * the stuff that is immutable. Previously we would have two ioctls, one to - * create the object with gem_create, and another to apply various parameters, - * however this creates some ambiguity for the params which are considered - * immutable. Also in general we're phasing out the various SET/GET ioctls. + * Note that new buffer flags should be added here, at least for the stuff that + * is immutable. Previously we would have two ioctls, one to create the object + * with gem_create, and another to apply various parameters, however this + * creates some ambiguity for the params which are considered immutable. Also in + * general we're phasing out the various SET/GET ioctls. */ struct drm_i915_gem_create_ext { /** @@ -3341,7 +3493,6 @@ struct drm_i915_gem_create_ext { * * The (page-aligned) allocated size for the object will be returned. * - * * DG2 64K min page size implications: * * On discrete platforms, starting from DG2, we have to contend with GTT @@ -3353,7 +3504,9 @@ struct drm_i915_gem_create_ext { * * Note that the returned size here will always reflect any required * rounding up done by the kernel, i.e 4K will now become 64K on devices - * such as DG2. + * such as DG2. The kernel will always select the largest minimum + * page-size for the set of possible placements as the value to use when + * rounding up the @size. * * Special DG2 GTT address alignment requirement: * @@ -3377,14 +3530,58 @@ struct drm_i915_gem_create_ext { * is deemed to be a good compromise. */ __u64 size; + /** * @handle: Returned handle for the object. * * Object handles are nonzero. */ __u32 handle; - /** @flags: MBZ */ + + /** + * @flags: Optional flags. + * + * Supported values: + * + * I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS - Signal to the kernel that + * the object will need to be accessed via the CPU. + * + * Only valid when placing objects in I915_MEMORY_CLASS_DEVICE, and only + * strictly required on configurations where some subset of the device + * memory is directly visible/mappable through the CPU (which we also + * call small BAR), like on some DG2+ systems. Note that this is quite + * undesirable, but due to various factors like the client CPU, BIOS etc + * it's something we can expect to see in the wild. See + * &drm_i915_memory_region_info.probed_cpu_visible_size for how to + * determine if this system applies. + * + * Note that one of the placements MUST be I915_MEMORY_CLASS_SYSTEM, to + * ensure the kernel can always spill the allocation to system memory, + * if the object can't be allocated in the mappable part of + * I915_MEMORY_CLASS_DEVICE. + * + * Also note that since the kernel only supports flat-CCS on objects + * that can *only* be placed in I915_MEMORY_CLASS_DEVICE, we therefore + * don't support I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS together with + * flat-CCS. + * + * Without this hint, the kernel will assume that non-mappable + * I915_MEMORY_CLASS_DEVICE is preferred for this object. Note that the + * kernel can still migrate the object to the mappable part, as a last + * resort, if userspace ever CPU faults this object, but this might be + * expensive, and so ideally should be avoided. + * + * On older kernels which lack the relevant small-bar uAPI support (see + * also &drm_i915_memory_region_info.probed_cpu_visible_size), + * usage of the flag will result in an error, but it should NEVER be + * possible to end up with a small BAR configuration, assuming we can + * also successfully load the i915 kernel module. In such cases the + * entire I915_MEMORY_CLASS_DEVICE region will be CPU accessible, and as + * such there are zero restrictions on where the object can be placed. + */ +#define I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS (1 << 0) __u32 flags; + /** * @extensions: The chain of extensions to apply to this object. * @@ -3443,6 +3640,22 @@ struct drm_i915_gem_create_ext { * At which point we get the object handle in &drm_i915_gem_create_ext.handle, * along with the final object size in &drm_i915_gem_create_ext.size, which * should account for any rounding up, if required. + * + * Note that userspace has no means of knowing the current backing region + * for objects where @num_regions is larger than one. The kernel will only + * ensure that the priority order of the @regions array is honoured, either + * when initially placing the object, or when moving memory around due to + * memory pressure + * + * On Flat-CCS capable HW, compression is supported for the objects residing + * in I915_MEMORY_CLASS_DEVICE. When such objects (compressed) have other + * memory class in @regions and migrated (by i915, due to memory + * constraints) to the non I915_MEMORY_CLASS_DEVICE region, then i915 needs to + * decompress the content. But i915 doesn't have the required information to + * decompress the userspace compressed objects. + * + * So i915 supports Flat-CCS, on the objects which can reside only on + * I915_MEMORY_CLASS_DEVICE regions. */ struct drm_i915_gem_create_ext_memory_regions { /** @base: Extension link. See struct i915_user_extension. */ diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h index 9f4428be3e36..a756b29afcc2 100644 --- a/tools/include/uapi/linux/fscrypt.h +++ b/tools/include/uapi/linux/fscrypt.h @@ -27,7 +27,8 @@ #define FSCRYPT_MODE_AES_128_CBC 5 #define FSCRYPT_MODE_AES_128_CTS 6 #define FSCRYPT_MODE_ADIANTUM 9 -/* If adding a mode number > 9, update FSCRYPT_MODE_MAX in fscrypt_private.h */ +#define FSCRYPT_MODE_AES_256_HCTR2 10 +/* If adding a mode number > 10, update FSCRYPT_MODE_MAX in fscrypt_private.h */ /* * Legacy policy version; ad-hoc KDF and no key verification. diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index cb6e3846d27b..eed0315a77a6 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -270,6 +270,8 @@ struct kvm_xen_exit { #define KVM_EXIT_X86_BUS_LOCK 33 #define KVM_EXIT_XEN 34 #define KVM_EXIT_RISCV_SBI 35 +#define KVM_EXIT_RISCV_CSR 36 +#define KVM_EXIT_NOTIFY 37 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -496,6 +498,18 @@ struct kvm_run { unsigned long args[6]; unsigned long ret[2]; } riscv_sbi; + /* KVM_EXIT_RISCV_CSR */ + struct { + unsigned long csr_num; + unsigned long new_value; + unsigned long write_mask; + unsigned long ret_value; + } riscv_csr; + /* KVM_EXIT_NOTIFY */ + struct { +#define KVM_NOTIFY_CONTEXT_INVALID (1 << 0) + __u32 flags; + } notify; /* Fix the size of the union. */ char padding[256]; }; @@ -1157,6 +1171,12 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_VM_TSC_CONTROL 214 #define KVM_CAP_SYSTEM_EVENT_DATA 215 #define KVM_CAP_ARM_SYSTEM_SUSPEND 216 +#define KVM_CAP_S390_PROTECTED_DUMP 217 +#define KVM_CAP_X86_TRIPLE_FAULT_EVENT 218 +#define KVM_CAP_X86_NOTIFY_VMEXIT 219 +#define KVM_CAP_VM_DISABLE_NX_HUGE_PAGES 220 +#define KVM_CAP_S390_ZPCI_OP 221 +#define KVM_CAP_S390_CPU_TOPOLOGY 222 #ifdef KVM_CAP_IRQ_ROUTING @@ -1660,6 +1680,55 @@ struct kvm_s390_pv_unp { __u64 tweak; }; +enum pv_cmd_dmp_id { + KVM_PV_DUMP_INIT, + KVM_PV_DUMP_CONFIG_STOR_STATE, + KVM_PV_DUMP_COMPLETE, + KVM_PV_DUMP_CPU, +}; + +struct kvm_s390_pv_dmp { + __u64 subcmd; + __u64 buff_addr; + __u64 buff_len; + __u64 gaddr; /* For dump storage state */ + __u64 reserved[4]; +}; + +enum pv_cmd_info_id { + KVM_PV_INFO_VM, + KVM_PV_INFO_DUMP, +}; + +struct kvm_s390_pv_info_dump { + __u64 dump_cpu_buffer_len; + __u64 dump_config_mem_buffer_per_1m; + __u64 dump_config_finalize_len; +}; + +struct kvm_s390_pv_info_vm { + __u64 inst_calls_list[4]; + __u64 max_cpus; + __u64 max_guests; + __u64 max_guest_addr; + __u64 feature_indication; +}; + +struct kvm_s390_pv_info_header { + __u32 id; + __u32 len_max; + __u32 len_written; + __u32 reserved; +}; + +struct kvm_s390_pv_info { + struct kvm_s390_pv_info_header header; + union { + struct kvm_s390_pv_info_dump dump; + struct kvm_s390_pv_info_vm vm; + }; +}; + enum pv_cmd_id { KVM_PV_ENABLE, KVM_PV_DISABLE, @@ -1668,6 +1737,8 @@ enum pv_cmd_id { KVM_PV_VERIFY, KVM_PV_PREP_RESET, KVM_PV_UNSHARE_ALL, + KVM_PV_INFO, + KVM_PV_DUMP, }; struct kvm_pv_cmd { @@ -2119,4 +2190,41 @@ struct kvm_stats_desc { /* Available with KVM_CAP_XSAVE2 */ #define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) +/* Available with KVM_CAP_S390_PROTECTED_DUMP */ +#define KVM_S390_PV_CPU_COMMAND _IOWR(KVMIO, 0xd0, struct kvm_pv_cmd) + +/* Available with KVM_CAP_X86_NOTIFY_VMEXIT */ +#define KVM_X86_NOTIFY_VMEXIT_ENABLED (1ULL << 0) +#define KVM_X86_NOTIFY_VMEXIT_USER (1ULL << 1) + +/* Available with KVM_CAP_S390_ZPCI_OP */ +#define KVM_S390_ZPCI_OP _IOW(KVMIO, 0xd1, struct kvm_s390_zpci_op) + +struct kvm_s390_zpci_op { + /* in */ + __u32 fh; /* target device */ + __u8 op; /* operation to perform */ + __u8 pad[3]; + union { + /* for KVM_S390_ZPCIOP_REG_AEN */ + struct { + __u64 ibv; /* Guest addr of interrupt bit vector */ + __u64 sb; /* Guest addr of summary bit */ + __u32 flags; + __u32 noi; /* Number of interrupts */ + __u8 isc; /* Guest interrupt subclass */ + __u8 sbo; /* Offset of guest summary bit vector */ + __u16 pad; + } reg_aen; + __u64 reserved[8]; + } u; +}; + +/* types for kvm_s390_zpci_op->op */ +#define KVM_S390_ZPCIOP_REG_AEN 0 +#define KVM_S390_ZPCIOP_DEREG_AEN 1 + +/* flags for kvm_s390_zpci_op->u.reg_aen.flags */ +#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0) + #endif /* __LINUX_KVM_H */ diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index e2b77fbca91e..581ed4bdc062 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -301,6 +301,7 @@ enum { * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING * { u64 id; } && PERF_FORMAT_ID + * { u64 lost; } && PERF_FORMAT_LOST * } && !PERF_FORMAT_GROUP * * { u64 nr; @@ -308,6 +309,7 @@ enum { * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING * { u64 value; * { u64 id; } && PERF_FORMAT_ID + * { u64 lost; } && PERF_FORMAT_LOST * } cntr[nr]; * } && PERF_FORMAT_GROUP * }; @@ -317,8 +319,9 @@ enum perf_event_read_format { PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, PERF_FORMAT_ID = 1U << 2, PERF_FORMAT_GROUP = 1U << 3, + PERF_FORMAT_LOST = 1U << 4, - PERF_FORMAT_MAX = 1U << 4, /* non-ABI */ + PERF_FORMAT_MAX = 1U << 5, /* non-ABI */ }; #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h index cab645d4a645..f9f115a7c75b 100644 --- a/tools/include/uapi/linux/vhost.h +++ b/tools/include/uapi/linux/vhost.h @@ -171,4 +171,13 @@ #define VHOST_VDPA_SET_GROUP_ASID _IOW(VHOST_VIRTIO, 0x7C, \ struct vhost_vring_state) +/* Suspend a device so it does not process virtqueue requests anymore + * + * After the return of ioctl the device must preserve all the necessary state + * (the virtqueue vring base plus the possible device specific states) that is + * required for restoring in the future. The device must not change its + * configuration after that point. + */ +#define VHOST_VDPA_SUSPEND _IO(VHOST_VIRTIO, 0x7D) + #endif diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c index 384d5e076ee4..6cd0be7c1bb4 100644 --- a/tools/lib/perf/cpumap.c +++ b/tools/lib/perf/cpumap.c @@ -309,7 +309,7 @@ bool perf_cpu_map__has(const struct perf_cpu_map *cpus, struct perf_cpu cpu) return perf_cpu_map__idx(cpus, cpu) != -1; } -struct perf_cpu perf_cpu_map__max(struct perf_cpu_map *map) +struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map) { struct perf_cpu result = { .cpu = -1 diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index 952f3520d5c2..8ce5bbd09666 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -305,6 +305,9 @@ int perf_evsel__read_size(struct perf_evsel *evsel) if (read_format & PERF_FORMAT_ID) entry += sizeof(u64); + if (read_format & PERF_FORMAT_LOST) + entry += sizeof(u64); + if (read_format & PERF_FORMAT_GROUP) { nr = evsel->nr_members; size += sizeof(u64); @@ -314,24 +317,98 @@ int perf_evsel__read_size(struct perf_evsel *evsel) return size; } +/* This only reads values for the leader */ +static int perf_evsel__read_group(struct perf_evsel *evsel, int cpu_map_idx, + int thread, struct perf_counts_values *count) +{ + size_t size = perf_evsel__read_size(evsel); + int *fd = FD(evsel, cpu_map_idx, thread); + u64 read_format = evsel->attr.read_format; + u64 *data; + int idx = 1; + + if (fd == NULL || *fd < 0) + return -EINVAL; + + data = calloc(1, size); + if (data == NULL) + return -ENOMEM; + + if (readn(*fd, data, size) <= 0) { + free(data); + return -errno; + } + + /* + * This reads only the leader event intentionally since we don't have + * perf counts values for sibling events. + */ + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + count->ena = data[idx++]; + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + count->run = data[idx++]; + + /* value is always available */ + count->val = data[idx++]; + if (read_format & PERF_FORMAT_ID) + count->id = data[idx++]; + if (read_format & PERF_FORMAT_LOST) + count->lost = data[idx++]; + + free(data); + return 0; +} + +/* + * The perf read format is very flexible. It needs to set the proper + * values according to the read format. + */ +static void perf_evsel__adjust_values(struct perf_evsel *evsel, u64 *buf, + struct perf_counts_values *count) +{ + u64 read_format = evsel->attr.read_format; + int n = 0; + + count->val = buf[n++]; + + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + count->ena = buf[n++]; + + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + count->run = buf[n++]; + + if (read_format & PERF_FORMAT_ID) + count->id = buf[n++]; + + if (read_format & PERF_FORMAT_LOST) + count->lost = buf[n++]; +} + int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread, struct perf_counts_values *count) { size_t size = perf_evsel__read_size(evsel); int *fd = FD(evsel, cpu_map_idx, thread); + u64 read_format = evsel->attr.read_format; + struct perf_counts_values buf; memset(count, 0, sizeof(*count)); if (fd == NULL || *fd < 0) return -EINVAL; + if (read_format & PERF_FORMAT_GROUP) + return perf_evsel__read_group(evsel, cpu_map_idx, thread, count); + if (MMAP(evsel, cpu_map_idx, thread) && + !(read_format & (PERF_FORMAT_ID | PERF_FORMAT_LOST)) && !perf_mmap__read_self(MMAP(evsel, cpu_map_idx, thread), count)) return 0; - if (readn(*fd, count->values, size) <= 0) + if (readn(*fd, buf.values, size) <= 0) return -errno; + perf_evsel__adjust_values(evsel, buf.values, count); return 0; } diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h index 24de795b09bb..03aceb72a783 100644 --- a/tools/lib/perf/include/perf/cpumap.h +++ b/tools/lib/perf/include/perf/cpumap.h @@ -23,7 +23,7 @@ LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map); LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus); LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map); -LIBPERF_API struct perf_cpu perf_cpu_map__max(struct perf_cpu_map *map); +LIBPERF_API struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map); LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu); #define perf_cpu_map__for_each_cpu(cpu, idx, cpus) \ diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h index 556bb06798f2..93bf93a59c99 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -6,6 +6,7 @@ #include <linux/types.h> #include <linux/limits.h> #include <linux/bpf.h> +#include <linux/compiler.h> #include <sys/types.h> /* pid_t */ #define event_contains(obj, mem) ((obj).header.size > offsetof(typeof(obj), mem)) @@ -76,7 +77,7 @@ struct perf_record_lost_samples { }; /* - * PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID + * PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID | PERF_FORMAT_LOST */ struct perf_record_read { struct perf_event_header header; @@ -85,6 +86,7 @@ struct perf_record_read { __u64 time_enabled; __u64 time_running; __u64 id; + __u64 lost; }; struct perf_record_throttle { @@ -153,22 +155,60 @@ enum { PERF_CPU_MAP__MASK = 1, }; +/* + * Array encoding of a perf_cpu_map where nr is the number of entries in cpu[] + * and each entry is a value for a CPU in the map. + */ struct cpu_map_entries { __u16 nr; __u16 cpu[]; }; -struct perf_record_record_cpu_map { +/* Bitmap encoding of a perf_cpu_map where bitmap entries are 32-bit. */ +struct perf_record_mask_cpu_map32 { + /* Number of mask values. */ + __u16 nr; + /* Constant 4. */ + __u16 long_size; + /* Bitmap data. */ + __u32 mask[]; +}; + +/* Bitmap encoding of a perf_cpu_map where bitmap entries are 64-bit. */ +struct perf_record_mask_cpu_map64 { + /* Number of mask values. */ __u16 nr; + /* Constant 8. */ __u16 long_size; - unsigned long mask[]; + /* Legacy padding. */ + char __pad[4]; + /* Bitmap data. */ + __u64 mask[]; }; -struct perf_record_cpu_map_data { +/* + * 'struct perf_record_cpu_map_data' is packed as unfortunately an earlier + * version had unaligned data and we wish to retain file format compatibility. + * -irogers + */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpacked" +#pragma GCC diagnostic ignored "-Wattributes" + +struct __packed perf_record_cpu_map_data { __u16 type; - char data[]; + union { + /* Used when type == PERF_CPU_MAP__CPUS. */ + struct cpu_map_entries cpus_data; + /* Used when type == PERF_CPU_MAP__MASK and long_size == 4. */ + struct perf_record_mask_cpu_map32 mask32_data; + /* Used when type == PERF_CPU_MAP__MASK and long_size == 8. */ + struct perf_record_mask_cpu_map64 mask64_data; + }; }; +#pragma GCC diagnostic pop + struct perf_record_cpu_map { struct perf_event_header header; struct perf_record_cpu_map_data data; diff --git a/tools/lib/perf/include/perf/evsel.h b/tools/lib/perf/include/perf/evsel.h index 699c0ed97d34..6f92204075c2 100644 --- a/tools/lib/perf/include/perf/evsel.h +++ b/tools/lib/perf/include/perf/evsel.h @@ -18,8 +18,10 @@ struct perf_counts_values { uint64_t val; uint64_t ena; uint64_t run; + uint64_t id; + uint64_t lost; }; - uint64_t values[3]; + uint64_t values[5]; }; }; diff --git a/tools/lib/perf/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c index 89be89afb24d..a11fc51bfb68 100644 --- a/tools/lib/perf/tests/test-evsel.c +++ b/tools/lib/perf/tests/test-evsel.c @@ -1,10 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 #include <stdarg.h> #include <stdio.h> +#include <string.h> #include <linux/perf_event.h> +#include <linux/kernel.h> #include <perf/cpumap.h> #include <perf/threadmap.h> #include <perf/evsel.h> +#include <internal/evsel.h> #include <internal/tests.h> #include "tests.h" @@ -189,6 +192,163 @@ static int test_stat_user_read(int event) return 0; } +static int test_stat_read_format_single(struct perf_event_attr *attr, struct perf_thread_map *threads) +{ + struct perf_evsel *evsel; + struct perf_counts_values counts; + volatile int count = 0x100000; + int err; + + evsel = perf_evsel__new(attr); + __T("failed to create evsel", evsel); + + /* skip old kernels that don't support the format */ + err = perf_evsel__open(evsel, NULL, threads); + if (err < 0) + return 0; + + while (count--) ; + + memset(&counts, -1, sizeof(counts)); + perf_evsel__read(evsel, 0, 0, &counts); + + __T("failed to read value", counts.val); + if (attr->read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + __T("failed to read TOTAL_TIME_ENABLED", counts.ena); + if (attr->read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + __T("failed to read TOTAL_TIME_RUNNING", counts.run); + if (attr->read_format & PERF_FORMAT_ID) + __T("failed to read ID", counts.id); + if (attr->read_format & PERF_FORMAT_LOST) + __T("failed to read LOST", counts.lost == 0); + + perf_evsel__close(evsel); + perf_evsel__delete(evsel); + return 0; +} + +static int test_stat_read_format_group(struct perf_event_attr *attr, struct perf_thread_map *threads) +{ + struct perf_evsel *leader, *member; + struct perf_counts_values counts; + volatile int count = 0x100000; + int err; + + attr->read_format |= PERF_FORMAT_GROUP; + leader = perf_evsel__new(attr); + __T("failed to create leader", leader); + + attr->read_format &= ~PERF_FORMAT_GROUP; + member = perf_evsel__new(attr); + __T("failed to create member", member); + + member->leader = leader; + leader->nr_members = 2; + + /* skip old kernels that don't support the format */ + err = perf_evsel__open(leader, NULL, threads); + if (err < 0) + return 0; + err = perf_evsel__open(member, NULL, threads); + if (err < 0) + return 0; + + while (count--) ; + + memset(&counts, -1, sizeof(counts)); + perf_evsel__read(leader, 0, 0, &counts); + + __T("failed to read leader value", counts.val); + if (attr->read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + __T("failed to read leader TOTAL_TIME_ENABLED", counts.ena); + if (attr->read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + __T("failed to read leader TOTAL_TIME_RUNNING", counts.run); + if (attr->read_format & PERF_FORMAT_ID) + __T("failed to read leader ID", counts.id); + if (attr->read_format & PERF_FORMAT_LOST) + __T("failed to read leader LOST", counts.lost == 0); + + memset(&counts, -1, sizeof(counts)); + perf_evsel__read(member, 0, 0, &counts); + + __T("failed to read member value", counts.val); + if (attr->read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + __T("failed to read member TOTAL_TIME_ENABLED", counts.ena); + if (attr->read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + __T("failed to read member TOTAL_TIME_RUNNING", counts.run); + if (attr->read_format & PERF_FORMAT_ID) + __T("failed to read member ID", counts.id); + if (attr->read_format & PERF_FORMAT_LOST) + __T("failed to read member LOST", counts.lost == 0); + + perf_evsel__close(member); + perf_evsel__close(leader); + perf_evsel__delete(member); + perf_evsel__delete(leader); + return 0; +} + +static int test_stat_read_format(void) +{ + struct perf_thread_map *threads; + struct perf_event_attr attr = { + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_TASK_CLOCK, + }; + int err, i; + +#define FMT(_fmt) PERF_FORMAT_ ## _fmt +#define FMT_TIME (FMT(TOTAL_TIME_ENABLED) | FMT(TOTAL_TIME_RUNNING)) + + uint64_t test_formats [] = { + 0, + FMT_TIME, + FMT(ID), + FMT(LOST), + FMT_TIME | FMT(ID), + FMT_TIME | FMT(LOST), + FMT_TIME | FMT(ID) | FMT(LOST), + FMT(ID) | FMT(LOST), + }; + +#undef FMT +#undef FMT_TIME + + threads = perf_thread_map__new_dummy(); + __T("failed to create threads", threads); + + perf_thread_map__set_pid(threads, 0, 0); + + for (i = 0; i < (int)ARRAY_SIZE(test_formats); i++) { + attr.read_format = test_formats[i]; + __T_VERBOSE("testing single read with read_format: %lx\n", + (unsigned long)test_formats[i]); + + err = test_stat_read_format_single(&attr, threads); + __T("failed to read single format", err == 0); + } + + perf_thread_map__put(threads); + + threads = perf_thread_map__new_array(2, NULL); + __T("failed to create threads", threads); + + perf_thread_map__set_pid(threads, 0, 0); + perf_thread_map__set_pid(threads, 1, 0); + + for (i = 0; i < (int)ARRAY_SIZE(test_formats); i++) { + attr.read_format = test_formats[i]; + __T_VERBOSE("testing group read with read_format: %lx\n", + (unsigned long)test_formats[i]); + + err = test_stat_read_format_group(&attr, threads); + __T("failed to read group format", err == 0); + } + + perf_thread_map__put(threads); + return 0; +} + int test_evsel(int argc, char **argv) { __T_START; @@ -200,6 +360,7 @@ int test_evsel(int argc, char **argv) test_stat_thread_enable(); test_stat_user_read(PERF_COUNT_HW_INSTRUCTIONS); test_stat_user_read(PERF_COUNT_HW_CPU_CYCLES); + test_stat_read_format(); __T_END; return tests_failed == 0 ? 0 : -1; diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 0cec74da7ffe..91678252a9b6 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -4096,7 +4096,8 @@ static int validate_ibt(struct objtool_file *file) * These sections can reference text addresses, but not with * the intent to indirect branch to them. */ - if (!strncmp(sec->name, ".discard", 8) || + if ((!strncmp(sec->name, ".discard", 8) && + strcmp(sec->name, ".discard.ibt_endbr_noseal")) || !strncmp(sec->name, ".debug", 6) || !strcmp(sec->name, ".altinstructions") || !strcmp(sec->name, ".ibt_endbr_seal") || diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c index f94929ebb54b..7ea150cdc137 100644 --- a/tools/perf/tests/cpumap.c +++ b/tools/perf/tests/cpumap.c @@ -17,21 +17,23 @@ static int process_event_mask(struct perf_tool *tool __maybe_unused, struct machine *machine __maybe_unused) { struct perf_record_cpu_map *map_event = &event->cpu_map; - struct perf_record_record_cpu_map *mask; struct perf_record_cpu_map_data *data; struct perf_cpu_map *map; int i; + unsigned int long_size; data = &map_event->data; TEST_ASSERT_VAL("wrong type", data->type == PERF_CPU_MAP__MASK); - mask = (struct perf_record_record_cpu_map *)data->data; + long_size = data->mask32_data.long_size; - TEST_ASSERT_VAL("wrong nr", mask->nr == 1); + TEST_ASSERT_VAL("wrong long_size", long_size == 4 || long_size == 8); + + TEST_ASSERT_VAL("wrong nr", data->mask32_data.nr == 1); for (i = 0; i < 20; i++) { - TEST_ASSERT_VAL("wrong cpu", test_bit(i, mask->mask)); + TEST_ASSERT_VAL("wrong cpu", perf_record_cpu_map_data__test_bit(i, data)); } map = cpu_map__new_data(data); @@ -51,7 +53,6 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused, struct machine *machine __maybe_unused) { struct perf_record_cpu_map *map_event = &event->cpu_map; - struct cpu_map_entries *cpus; struct perf_record_cpu_map_data *data; struct perf_cpu_map *map; @@ -59,11 +60,9 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused, TEST_ASSERT_VAL("wrong type", data->type == PERF_CPU_MAP__CPUS); - cpus = (struct cpu_map_entries *)data->data; - - TEST_ASSERT_VAL("wrong nr", cpus->nr == 2); - TEST_ASSERT_VAL("wrong cpu", cpus->cpu[0] == 1); - TEST_ASSERT_VAL("wrong cpu", cpus->cpu[1] == 256); + TEST_ASSERT_VAL("wrong nr", data->cpus_data.nr == 2); + TEST_ASSERT_VAL("wrong cpu", data->cpus_data.cpu[0] == 1); + TEST_ASSERT_VAL("wrong cpu", data->cpus_data.cpu[1] == 256); map = cpu_map__new_data(data); TEST_ASSERT_VAL("wrong nr", perf_cpu_map__nr(map) == 2); diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 07f2411b0ad4..20930dd48ee0 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -86,10 +86,15 @@ static bool samples_same(const struct perf_sample *s1, COMP(read.time_running); /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */ if (read_format & PERF_FORMAT_GROUP) { - for (i = 0; i < s1->read.group.nr; i++) - MCOMP(read.group.values[i]); + for (i = 0; i < s1->read.group.nr; i++) { + /* FIXME: check values without LOST */ + if (read_format & PERF_FORMAT_LOST) + MCOMP(read.group.values[i]); + } } else { COMP(read.one.id); + if (read_format & PERF_FORMAT_LOST) + COMP(read.one.lost); } } @@ -263,7 +268,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) .data = (void *)aux_data, }, }; - struct sample_read_value values[] = {{1, 5}, {9, 3}, {2, 7}, {6, 4},}; + struct sample_read_value values[] = {{1, 5, 0}, {9, 3, 0}, {2, 7, 0}, {6, 4, 1},}; struct perf_sample sample_out, sample_out_endian; size_t i, sz, bufsz; int err, ret = -1; @@ -286,6 +291,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) } else { sample.read.one.value = 0x08789faeb786aa87ULL; sample.read.one.id = 99; + sample.read.one.lost = 1; } sz = perf_event__sample_event_size(&sample, sample_type, read_format); @@ -370,7 +376,7 @@ out_free: */ static int test__sample_parsing(struct test_suite *test __maybe_unused, int subtest __maybe_unused) { - const u64 rf[] = {4, 5, 6, 7, 12, 13, 14, 15}; + const u64 rf[] = {4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 28, 29, 30, 31}; u64 sample_type; u64 sample_regs; size_t i; diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index 17311ad9f9af..de3701a2a212 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -14,6 +14,8 @@ struct file; struct pid; struct cred; struct socket; +struct sock; +struct sk_buff; #define __sockaddr_check_size(size) \ BUILD_BUG_ON(((size) > sizeof(struct __kernel_sockaddr_storage))) @@ -69,6 +71,9 @@ struct msghdr { unsigned int msg_flags; /* flags on received message */ __kernel_size_t msg_controllen; /* ancillary data buffer length */ struct kiocb *msg_iocb; /* ptr to iocb for async requests */ + struct ubuf_info *msg_ubuf; + int (*sg_from_iter)(struct sock *sk, struct sk_buff *skb, + struct iov_iter *from, size_t length); }; struct user_msghdr { @@ -416,10 +421,9 @@ extern int recvmsg_copy_msghdr(struct msghdr *msg, struct user_msghdr __user *umsg, unsigned flags, struct sockaddr __user **uaddr, struct iovec **iov); -extern int __copy_msghdr_from_user(struct msghdr *kmsg, - struct user_msghdr __user *umsg, - struct sockaddr __user **save_addr, - struct iovec __user **uiov, size_t *nsegs); +extern int __copy_msghdr(struct msghdr *kmsg, + struct user_msghdr *umsg, + struct sockaddr __user **save_addr); /* helpers which do the actual work for syscalls */ extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size, @@ -428,10 +432,6 @@ extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size, extern int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags, struct sockaddr __user *addr, int addr_len); -extern int __sys_accept4_file(struct file *file, unsigned file_flags, - struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen, int flags, - unsigned long nofile); extern struct file *do_accept(struct file *file, unsigned file_flags, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen, int flags); diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 12b2243222b0..ae43fb88f444 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -22,54 +22,102 @@ static int max_node_num; */ static int *cpunode_map; -static struct perf_cpu_map *cpu_map__from_entries(struct cpu_map_entries *cpus) +bool perf_record_cpu_map_data__test_bit(int i, + const struct perf_record_cpu_map_data *data) +{ + int bit_word32 = i / 32; + __u32 bit_mask32 = 1U << (i & 31); + int bit_word64 = i / 64; + __u64 bit_mask64 = ((__u64)1) << (i & 63); + + return (data->mask32_data.long_size == 4) + ? (bit_word32 < data->mask32_data.nr) && + (data->mask32_data.mask[bit_word32] & bit_mask32) != 0 + : (bit_word64 < data->mask64_data.nr) && + (data->mask64_data.mask[bit_word64] & bit_mask64) != 0; +} + +/* Read ith mask value from data into the given 64-bit sized bitmap */ +static void perf_record_cpu_map_data__read_one_mask(const struct perf_record_cpu_map_data *data, + int i, unsigned long *bitmap) +{ +#if __SIZEOF_LONG__ == 8 + if (data->mask32_data.long_size == 4) + bitmap[0] = data->mask32_data.mask[i]; + else + bitmap[0] = data->mask64_data.mask[i]; +#else + if (data->mask32_data.long_size == 4) { + bitmap[0] = data->mask32_data.mask[i]; + bitmap[1] = 0; + } else { +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + bitmap[0] = (unsigned long)(data->mask64_data.mask[i] >> 32); + bitmap[1] = (unsigned long)data->mask64_data.mask[i]; +#else + bitmap[0] = (unsigned long)data->mask64_data.mask[i]; + bitmap[1] = (unsigned long)(data->mask64_data.mask[i] >> 32); +#endif + } +#endif +} +static struct perf_cpu_map *cpu_map__from_entries(const struct perf_record_cpu_map_data *data) { struct perf_cpu_map *map; - map = perf_cpu_map__empty_new(cpus->nr); + map = perf_cpu_map__empty_new(data->cpus_data.nr); if (map) { unsigned i; - for (i = 0; i < cpus->nr; i++) { + for (i = 0; i < data->cpus_data.nr; i++) { /* * Special treatment for -1, which is not real cpu number, * and we need to use (int) -1 to initialize map[i], * otherwise it would become 65535. */ - if (cpus->cpu[i] == (u16) -1) + if (data->cpus_data.cpu[i] == (u16) -1) map->map[i].cpu = -1; else - map->map[i].cpu = (int) cpus->cpu[i]; + map->map[i].cpu = (int) data->cpus_data.cpu[i]; } } return map; } -static struct perf_cpu_map *cpu_map__from_mask(struct perf_record_record_cpu_map *mask) +static struct perf_cpu_map *cpu_map__from_mask(const struct perf_record_cpu_map_data *data) { + DECLARE_BITMAP(local_copy, 64); + int weight = 0, mask_nr = data->mask32_data.nr; struct perf_cpu_map *map; - int nr, nbits = mask->nr * mask->long_size * BITS_PER_BYTE; - nr = bitmap_weight(mask->mask, nbits); + for (int i = 0; i < mask_nr; i++) { + perf_record_cpu_map_data__read_one_mask(data, i, local_copy); + weight += bitmap_weight(local_copy, 64); + } + + map = perf_cpu_map__empty_new(weight); + if (!map) + return NULL; - map = perf_cpu_map__empty_new(nr); - if (map) { - int cpu, i = 0; + for (int i = 0, j = 0; i < mask_nr; i++) { + int cpus_per_i = (i * data->mask32_data.long_size * BITS_PER_BYTE); + int cpu; - for_each_set_bit(cpu, mask->mask, nbits) - map->map[i++].cpu = cpu; + perf_record_cpu_map_data__read_one_mask(data, i, local_copy); + for_each_set_bit(cpu, local_copy, 64) + map->map[j++].cpu = cpu + cpus_per_i; } return map; } -struct perf_cpu_map *cpu_map__new_data(struct perf_record_cpu_map_data *data) +struct perf_cpu_map *cpu_map__new_data(const struct perf_record_cpu_map_data *data) { if (data->type == PERF_CPU_MAP__CPUS) - return cpu_map__from_entries((struct cpu_map_entries *)data->data); + return cpu_map__from_entries(data); else - return cpu_map__from_mask((struct perf_record_record_cpu_map *)data->data); + return cpu_map__from_mask(data); } size_t cpu_map__fprintf(struct perf_cpu_map *map, FILE *fp) diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 703ae6d3386e..fa8a5acdcae1 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -37,9 +37,11 @@ struct cpu_aggr_map { struct perf_record_cpu_map_data; +bool perf_record_cpu_map_data__test_bit(int i, const struct perf_record_cpu_map_data *data); + struct perf_cpu_map *perf_cpu_map__empty_new(int nr); -struct perf_cpu_map *cpu_map__new_data(struct perf_record_cpu_map_data *data); +struct perf_cpu_map *cpu_map__new_data(const struct perf_record_cpu_map_data *data); size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size); size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size); size_t cpu_map__fprintf(struct perf_cpu_map *map, FILE *fp); diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index a7b0931d5137..12eae6917022 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -65,7 +65,8 @@ struct stack_dump { struct sample_read_value { u64 value; - u64 id; + u64 id; /* only if PERF_FORMAT_ID */ + u64 lost; /* only if PERF_FORMAT_LOST */ }; struct sample_read { @@ -80,6 +81,24 @@ struct sample_read { }; }; +static inline size_t sample_read_value_size(u64 read_format) +{ + /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */ + if (read_format & PERF_FORMAT_LOST) + return sizeof(struct sample_read_value); + else + return offsetof(struct sample_read_value, lost); +} + +static inline struct sample_read_value * +next_sample_read_value(struct sample_read_value *v, u64 read_format) +{ + return (void *)v + sample_read_value_size(read_format); +} + +#define sample_read_group__for_each(v, nr, rf) \ + for (int __i = 0; __i < (int)nr; v = next_sample_read_value(v, rf), __i++) + struct ip_callchain { u64 nr; u64 ips[]; @@ -463,10 +482,6 @@ size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FIL int kallsyms__get_function_start(const char *kallsyms_filename, const char *symbol_name, u64 *addr); -void *cpu_map_data__alloc(struct perf_cpu_map *map, size_t *size, u16 *type, int *max); -void cpu_map_data__synthesize(struct perf_record_cpu_map_data *data, struct perf_cpu_map *map, - u16 type, int max); - void event_attr_init(struct perf_event_attr *attr); int perf_event_paranoid(void); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 4852089e1d79..18c3eb864d55 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1541,7 +1541,7 @@ static int evsel__read_one(struct evsel *evsel, int cpu_map_idx, int thread) } static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread, - u64 val, u64 ena, u64 run) + u64 val, u64 ena, u64 run, u64 lost) { struct perf_counts_values *count; @@ -1550,6 +1550,7 @@ static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread, count->val = val; count->ena = ena; count->run = run; + count->lost = lost; perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, true); } @@ -1558,7 +1559,7 @@ static int evsel__process_group_data(struct evsel *leader, int cpu_map_idx, int { u64 read_format = leader->core.attr.read_format; struct sample_read_value *v; - u64 nr, ena = 0, run = 0, i; + u64 nr, ena = 0, run = 0, lost = 0; nr = *data++; @@ -1571,18 +1572,18 @@ static int evsel__process_group_data(struct evsel *leader, int cpu_map_idx, int if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) run = *data++; - v = (struct sample_read_value *) data; - - evsel__set_count(leader, cpu_map_idx, thread, v[0].value, ena, run); - - for (i = 1; i < nr; i++) { + v = (void *)data; + sample_read_group__for_each(v, nr, read_format) { struct evsel *counter; - counter = evlist__id2evsel(leader->evlist, v[i].id); + counter = evlist__id2evsel(leader->evlist, v->id); if (!counter) return -EINVAL; - evsel__set_count(counter, cpu_map_idx, thread, v[i].value, ena, run); + if (read_format & PERF_FORMAT_LOST) + lost = v->lost; + + evsel__set_count(counter, cpu_map_idx, thread, v->value, ena, run, lost); } return 0; @@ -2475,8 +2476,8 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, if (data->read.group.nr > max_group_nr) return -EFAULT; - sz = data->read.group.nr * - sizeof(struct sample_read_value); + + sz = data->read.group.nr * sample_read_value_size(read_format); OVERFLOW_CHECK(array, sz, max_size); data->read.group.values = (struct sample_read_value *)array; @@ -2485,6 +2486,12 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, OVERFLOW_CHECK_u64(array); data->read.one.id = *array; array++; + + if (read_format & PERF_FORMAT_LOST) { + OVERFLOW_CHECK_u64(array); + data->read.one.lost = *array; + array++; + } } } diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 9ef2406e0ede..1f2040f36d4e 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -642,15 +642,19 @@ exit: return pylist; } -static PyObject *get_sample_value_as_tuple(struct sample_read_value *value) +static PyObject *get_sample_value_as_tuple(struct sample_read_value *value, + u64 read_format) { PyObject *t; - t = PyTuple_New(2); + t = PyTuple_New(3); if (!t) Py_FatalError("couldn't create Python tuple"); PyTuple_SetItem(t, 0, PyLong_FromUnsignedLongLong(value->id)); PyTuple_SetItem(t, 1, PyLong_FromUnsignedLongLong(value->value)); + if (read_format & PERF_FORMAT_LOST) + PyTuple_SetItem(t, 2, PyLong_FromUnsignedLongLong(value->lost)); + return t; } @@ -681,12 +685,17 @@ static void set_sample_read_in_dict(PyObject *dict_sample, Py_FatalError("couldn't create Python list"); if (read_format & PERF_FORMAT_GROUP) { - for (i = 0; i < sample->read.group.nr; i++) { - PyObject *t = get_sample_value_as_tuple(&sample->read.group.values[i]); + struct sample_read_value *v = sample->read.group.values; + + i = 0; + sample_read_group__for_each(v, sample->read.group.nr, read_format) { + PyObject *t = get_sample_value_as_tuple(v, read_format); PyList_SET_ITEM(values, i, t); + i++; } } else { - PyObject *t = get_sample_value_as_tuple(&sample->read.one); + PyObject *t = get_sample_value_as_tuple(&sample->read.one, + read_format); PyList_SET_ITEM(values, 0, t); } pydict_set_item_string_decref(dict_sample, "values", values); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 98e16659a149..192c9274f7ad 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -916,30 +916,30 @@ static void perf_event__cpu_map_swap(union perf_event *event, bool sample_id_all __maybe_unused) { struct perf_record_cpu_map_data *data = &event->cpu_map.data; - struct cpu_map_entries *cpus; - struct perf_record_record_cpu_map *mask; - unsigned i; data->type = bswap_16(data->type); switch (data->type) { case PERF_CPU_MAP__CPUS: - cpus = (struct cpu_map_entries *)data->data; - - cpus->nr = bswap_16(cpus->nr); + data->cpus_data.nr = bswap_16(data->cpus_data.nr); - for (i = 0; i < cpus->nr; i++) - cpus->cpu[i] = bswap_16(cpus->cpu[i]); + for (unsigned i = 0; i < data->cpus_data.nr; i++) + data->cpus_data.cpu[i] = bswap_16(data->cpus_data.cpu[i]); break; case PERF_CPU_MAP__MASK: - mask = (struct perf_record_record_cpu_map *)data->data; - - mask->nr = bswap_16(mask->nr); - mask->long_size = bswap_16(mask->long_size); + data->mask32_data.long_size = bswap_16(data->mask32_data.long_size); - switch (mask->long_size) { - case 4: mem_bswap_32(&mask->mask, mask->nr); break; - case 8: mem_bswap_64(&mask->mask, mask->nr); break; + switch (data->mask32_data.long_size) { + case 4: + data->mask32_data.nr = bswap_16(data->mask32_data.nr); + for (unsigned i = 0; i < data->mask32_data.nr; i++) + data->mask32_data.mask[i] = bswap_32(data->mask32_data.mask[i]); + break; + case 8: + data->mask64_data.nr = bswap_16(data->mask64_data.nr); + for (unsigned i = 0; i < data->mask64_data.nr; i++) + data->mask64_data.mask[i] = bswap_64(data->mask64_data.mask[i]); + break; default: pr_err("cpu_map swap: unsupported long size\n"); } @@ -1283,21 +1283,25 @@ static void sample_read__printf(struct perf_sample *sample, u64 read_format) sample->read.time_running); if (read_format & PERF_FORMAT_GROUP) { - u64 i; + struct sample_read_value *value = sample->read.group.values; printf(".... group nr %" PRIu64 "\n", sample->read.group.nr); - for (i = 0; i < sample->read.group.nr; i++) { - struct sample_read_value *value; - - value = &sample->read.group.values[i]; + sample_read_group__for_each(value, sample->read.group.nr, read_format) { printf("..... id %016" PRIx64 - ", value %016" PRIx64 "\n", + ", value %016" PRIx64, value->id, value->value); + if (read_format & PERF_FORMAT_LOST) + printf(", lost %" PRIu64, value->lost); + printf("\n"); } - } else - printf("..... id %016" PRIx64 ", value %016" PRIx64 "\n", + } else { + printf("..... id %016" PRIx64 ", value %016" PRIx64, sample->read.one.id, sample->read.one.value); + if (read_format & PERF_FORMAT_LOST) + printf(", lost %" PRIu64, sample->read.one.lost); + printf("\n"); + } } static void dump_event(struct evlist *evlist, union perf_event *event, @@ -1411,6 +1415,9 @@ static void dump_read(struct evsel *evsel, union perf_event *event) if (read_format & PERF_FORMAT_ID) printf("... id : %" PRI_lu64 "\n", read_event->id); + + if (read_format & PERF_FORMAT_LOST) + printf("... lost : %" PRI_lu64 "\n", read_event->lost); } static struct machine *machines__find_for_cpumode(struct machines *machines, @@ -1479,14 +1486,14 @@ static int deliver_sample_group(struct evlist *evlist, struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, - struct machine *machine) + struct machine *machine, + u64 read_format) { int ret = -EINVAL; - u64 i; + struct sample_read_value *v = sample->read.group.values; - for (i = 0; i < sample->read.group.nr; i++) { - ret = deliver_sample_value(evlist, tool, event, sample, - &sample->read.group.values[i], + sample_read_group__for_each(v, sample->read.group.nr, read_format) { + ret = deliver_sample_value(evlist, tool, event, sample, v, machine); if (ret) break; @@ -1510,7 +1517,7 @@ static int evlist__deliver_sample(struct evlist *evlist, struct perf_tool *tool, /* For PERF_SAMPLE_READ we have either single or group mode. */ if (read_format & PERF_FORMAT_GROUP) return deliver_sample_group(evlist, tool, event, sample, - machine); + machine, read_format); else return deliver_sample_value(evlist, tool, event, sample, &sample->read.one, machine); diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 2ae59c03ae77..812424dbf2d5 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1184,52 +1184,48 @@ int perf_event__synthesize_thread_map2(struct perf_tool *tool, return err; } -static void synthesize_cpus(struct cpu_map_entries *cpus, - struct perf_cpu_map *map) +static void synthesize_cpus(struct perf_record_cpu_map_data *data, + const struct perf_cpu_map *map) { int i, map_nr = perf_cpu_map__nr(map); - cpus->nr = map_nr; + data->cpus_data.nr = map_nr; for (i = 0; i < map_nr; i++) - cpus->cpu[i] = perf_cpu_map__cpu(map, i).cpu; + data->cpus_data.cpu[i] = perf_cpu_map__cpu(map, i).cpu; } -static void synthesize_mask(struct perf_record_record_cpu_map *mask, - struct perf_cpu_map *map, int max) +static void synthesize_mask(struct perf_record_cpu_map_data *data, + const struct perf_cpu_map *map, int max) { - int i; + int idx; + struct perf_cpu cpu; + + /* Due to padding, the 4bytes per entry mask variant is always smaller. */ + data->mask32_data.nr = BITS_TO_U32(max); + data->mask32_data.long_size = 4; - mask->nr = BITS_TO_LONGS(max); - mask->long_size = sizeof(long); + perf_cpu_map__for_each_cpu(cpu, idx, map) { + int bit_word = cpu.cpu / 32; + __u32 bit_mask = 1U << (cpu.cpu & 31); - for (i = 0; i < perf_cpu_map__nr(map); i++) - set_bit(perf_cpu_map__cpu(map, i).cpu, mask->mask); + data->mask32_data.mask[bit_word] |= bit_mask; + } } -static size_t cpus_size(struct perf_cpu_map *map) +static size_t cpus_size(const struct perf_cpu_map *map) { return sizeof(struct cpu_map_entries) + perf_cpu_map__nr(map) * sizeof(u16); } -static size_t mask_size(struct perf_cpu_map *map, int *max) +static size_t mask_size(const struct perf_cpu_map *map, int *max) { - int i; - - *max = 0; - - for (i = 0; i < perf_cpu_map__nr(map); i++) { - /* bit position of the cpu is + 1 */ - int bit = perf_cpu_map__cpu(map, i).cpu + 1; - - if (bit > *max) - *max = bit; - } - - return sizeof(struct perf_record_record_cpu_map) + BITS_TO_LONGS(*max) * sizeof(long); + *max = perf_cpu_map__max(map).cpu; + return sizeof(struct perf_record_mask_cpu_map32) + BITS_TO_U32(*max) * sizeof(__u32); } -void *cpu_map_data__alloc(struct perf_cpu_map *map, size_t *size, u16 *type, int *max) +static void *cpu_map_data__alloc(const struct perf_cpu_map *map, size_t *size, + u16 *type, int *max) { size_t size_cpus, size_mask; bool is_dummy = perf_cpu_map__empty(map); @@ -1258,30 +1254,31 @@ void *cpu_map_data__alloc(struct perf_cpu_map *map, size_t *size, u16 *type, int *type = PERF_CPU_MAP__MASK; } - *size += sizeof(struct perf_record_cpu_map_data); + *size += sizeof(__u16); /* For perf_record_cpu_map_data.type. */ *size = PERF_ALIGN(*size, sizeof(u64)); return zalloc(*size); } -void cpu_map_data__synthesize(struct perf_record_cpu_map_data *data, struct perf_cpu_map *map, - u16 type, int max) +static void cpu_map_data__synthesize(struct perf_record_cpu_map_data *data, + const struct perf_cpu_map *map, + u16 type, int max) { data->type = type; switch (type) { case PERF_CPU_MAP__CPUS: - synthesize_cpus((struct cpu_map_entries *) data->data, map); + synthesize_cpus(data, map); break; case PERF_CPU_MAP__MASK: - synthesize_mask((struct perf_record_record_cpu_map *)data->data, map, max); + synthesize_mask(data, map, max); default: break; } } -static struct perf_record_cpu_map *cpu_map_event__new(struct perf_cpu_map *map) +static struct perf_record_cpu_map *cpu_map_event__new(const struct perf_cpu_map *map) { - size_t size = sizeof(struct perf_record_cpu_map); + size_t size = sizeof(struct perf_event_header); struct perf_record_cpu_map *event; int max; u16 type; @@ -1299,7 +1296,7 @@ static struct perf_record_cpu_map *cpu_map_event__new(struct perf_cpu_map *map) } int perf_event__synthesize_cpu_map(struct perf_tool *tool, - struct perf_cpu_map *map, + const struct perf_cpu_map *map, perf_event__handler_t process, struct machine *machine) { @@ -1432,11 +1429,12 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, result += sizeof(u64); /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */ if (read_format & PERF_FORMAT_GROUP) { - sz = sample->read.group.nr * - sizeof(struct sample_read_value); - result += sz; + sz = sample_read_value_size(read_format); + result += sz * sample->read.group.nr; } else { result += sizeof(u64); + if (read_format & PERF_FORMAT_LOST) + result += sizeof(u64); } } @@ -1521,6 +1519,20 @@ void __weak arch_perf_synthesize_sample_weight(const struct perf_sample *data, *array = data->weight; } +static __u64 *copy_read_group_values(__u64 *array, __u64 read_format, + const struct perf_sample *sample) +{ + size_t sz = sample_read_value_size(read_format); + struct sample_read_value *v = sample->read.group.values; + + sample_read_group__for_each(v, sample->read.group.nr, read_format) { + /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */ + memcpy(array, v, sz); + array = (void *)array + sz; + } + return array; +} + int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_format, const struct perf_sample *sample) { @@ -1602,13 +1614,16 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */ if (read_format & PERF_FORMAT_GROUP) { - sz = sample->read.group.nr * - sizeof(struct sample_read_value); - memcpy(array, sample->read.group.values, sz); - array = (void *)array + sz; + array = copy_read_group_values(array, read_format, + sample); } else { *array = sample->read.one.id; array++; + + if (read_format & PERF_FORMAT_LOST) { + *array = sample->read.one.lost; + array++; + } } } diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h index 81cb3d6af0b9..53737d1619a4 100644 --- a/tools/perf/util/synthetic-events.h +++ b/tools/perf/util/synthetic-events.h @@ -46,7 +46,7 @@ typedef int (*perf_event__handler_t)(struct perf_tool *tool, union perf_event *e int perf_event__synthesize_attrs(struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process); int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr *attr, u32 ids, u64 *id, perf_event__handler_t process); int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16 misc, perf_event__handler_t process, struct machine *machine); -int perf_event__synthesize_cpu_map(struct perf_tool *tool, struct perf_cpu_map *cpus, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_cpu_map(struct perf_tool *tool, const struct perf_cpu_map *cpus, perf_event__handler_t process, struct machine *machine); int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process); int perf_event__synthesize_event_update_name(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process); int perf_event__synthesize_event_update_scale(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process); diff --git a/tools/testing/selftests/landlock/Makefile b/tools/testing/selftests/landlock/Makefile index a6959df28eb0..02868ac3bc71 100644 --- a/tools/testing/selftests/landlock/Makefile +++ b/tools/testing/selftests/landlock/Makefile @@ -9,10 +9,13 @@ TEST_GEN_PROGS := $(src_test:.c=) TEST_GEN_PROGS_EXTENDED := true OVERRIDE_TARGETS := 1 +top_srcdir := ../../../.. include ../lib.mk +khdr_dir = $(top_srcdir)/usr/include + $(OUTPUT)/true: true.c $(LINK.c) $< $(LDLIBS) -o $@ -static -$(OUTPUT)/%_test: %_test.c ../kselftest_harness.h common.h - $(LINK.c) $< $(LDLIBS) -o $@ -lcap +$(OUTPUT)/%_test: %_test.c $(khdr_dir)/linux/landlock.h ../kselftest_harness.h common.h + $(LINK.c) $< $(LDLIBS) -o $@ -lcap -I$(khdr_dir) diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh index d4ffebb989f8..7060bae04ec8 100755 --- a/tools/testing/selftests/netfilter/nft_flowtable.sh +++ b/tools/testing/selftests/netfilter/nft_flowtable.sh @@ -14,13 +14,17 @@ # nft_flowtable.sh -o8000 -l1500 -r2000 # +sfx=$(mktemp -u "XXXXXXXX") +ns1="ns1-$sfx" +ns2="ns2-$sfx" +nsr1="nsr1-$sfx" +nsr2="nsr2-$sfx" # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 ret=0 -ns1in="" -ns2in="" +nsin="" ns1out="" ns2out="" @@ -36,21 +40,19 @@ checktool (){ checktool "nft --version" "run test without nft tool" checktool "ip -Version" "run test without ip tool" checktool "which nc" "run test without nc (netcat)" -checktool "ip netns add nsr1" "create net namespace" +checktool "ip netns add $nsr1" "create net namespace $nsr1" -ip netns add ns1 -ip netns add ns2 - -ip netns add nsr2 +ip netns add $ns1 +ip netns add $ns2 +ip netns add $nsr2 cleanup() { - for i in 1 2; do - ip netns del ns$i - ip netns del nsr$i - done + ip netns del $ns1 + ip netns del $ns2 + ip netns del $nsr1 + ip netns del $nsr2 - rm -f "$ns1in" "$ns1out" - rm -f "$ns2in" "$ns2out" + rm -f "$nsin" "$ns1out" "$ns2out" [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns } @@ -59,22 +61,21 @@ trap cleanup EXIT sysctl -q net.netfilter.nf_log_all_netns=1 -ip link add veth0 netns nsr1 type veth peer name eth0 netns ns1 -ip link add veth1 netns nsr1 type veth peer name veth0 netns nsr2 +ip link add veth0 netns $nsr1 type veth peer name eth0 netns $ns1 +ip link add veth1 netns $nsr1 type veth peer name veth0 netns $nsr2 -ip link add veth1 netns nsr2 type veth peer name eth0 netns ns2 +ip link add veth1 netns $nsr2 type veth peer name eth0 netns $ns2 for dev in lo veth0 veth1; do - for i in 1 2; do - ip -net nsr$i link set $dev up - done + ip -net $nsr1 link set $dev up + ip -net $nsr2 link set $dev up done -ip -net nsr1 addr add 10.0.1.1/24 dev veth0 -ip -net nsr1 addr add dead:1::1/64 dev veth0 +ip -net $nsr1 addr add 10.0.1.1/24 dev veth0 +ip -net $nsr1 addr add dead:1::1/64 dev veth0 -ip -net nsr2 addr add 10.0.2.1/24 dev veth1 -ip -net nsr2 addr add dead:2::1/64 dev veth1 +ip -net $nsr2 addr add 10.0.2.1/24 dev veth1 +ip -net $nsr2 addr add dead:2::1/64 dev veth1 # set different MTUs so we need to push packets coming from ns1 (large MTU) # to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1), @@ -106,85 +107,76 @@ do esac done -if ! ip -net nsr1 link set veth0 mtu $omtu; then +if ! ip -net $nsr1 link set veth0 mtu $omtu; then exit 1 fi -ip -net ns1 link set eth0 mtu $omtu +ip -net $ns1 link set eth0 mtu $omtu -if ! ip -net nsr2 link set veth1 mtu $rmtu; then +if ! ip -net $nsr2 link set veth1 mtu $rmtu; then exit 1 fi -ip -net ns2 link set eth0 mtu $rmtu +ip -net $ns2 link set eth0 mtu $rmtu # transfer-net between nsr1 and nsr2. # these addresses are not used for connections. -ip -net nsr1 addr add 192.168.10.1/24 dev veth1 -ip -net nsr1 addr add fee1:2::1/64 dev veth1 - -ip -net nsr2 addr add 192.168.10.2/24 dev veth0 -ip -net nsr2 addr add fee1:2::2/64 dev veth0 - -for i in 1 2; do - ip netns exec nsr$i sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null - ip netns exec nsr$i sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null - - ip -net ns$i link set lo up - ip -net ns$i link set eth0 up - ip -net ns$i addr add 10.0.$i.99/24 dev eth0 - ip -net ns$i route add default via 10.0.$i.1 - ip -net ns$i addr add dead:$i::99/64 dev eth0 - ip -net ns$i route add default via dead:$i::1 - if ! ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then +ip -net $nsr1 addr add 192.168.10.1/24 dev veth1 +ip -net $nsr1 addr add fee1:2::1/64 dev veth1 + +ip -net $nsr2 addr add 192.168.10.2/24 dev veth0 +ip -net $nsr2 addr add fee1:2::2/64 dev veth0 + +for i in 0 1; do + ip netns exec $nsr1 sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null + ip netns exec $nsr2 sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null +done + +for ns in $ns1 $ns2;do + ip -net $ns link set lo up + ip -net $ns link set eth0 up + + if ! ip netns exec $ns sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then echo "ERROR: Check Originator/Responder values (problem during address addition)" exit 1 fi - # don't set ip DF bit for first two tests - ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null + ip netns exec $ns sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null done -ip -net nsr1 route add default via 192.168.10.2 -ip -net nsr2 route add default via 192.168.10.1 +ip -net $ns1 addr add 10.0.1.99/24 dev eth0 +ip -net $ns2 addr add 10.0.2.99/24 dev eth0 +ip -net $ns1 route add default via 10.0.1.1 +ip -net $ns2 route add default via 10.0.2.1 +ip -net $ns1 addr add dead:1::99/64 dev eth0 +ip -net $ns2 addr add dead:2::99/64 dev eth0 +ip -net $ns1 route add default via dead:1::1 +ip -net $ns2 route add default via dead:2::1 + +ip -net $nsr1 route add default via 192.168.10.2 +ip -net $nsr2 route add default via 192.168.10.1 -ip netns exec nsr1 nft -f - <<EOF +ip netns exec $nsr1 nft -f - <<EOF table inet filter { flowtable f1 { hook ingress priority 0 devices = { veth0, veth1 } } + counter routed_orig { } + counter routed_repl { } + chain forward { type filter hook forward priority 0; policy drop; # flow offloaded? Tag ct with mark 1, so we can detect when it fails. - meta oif "veth1" tcp dport 12345 flow offload @f1 counter - - # use packet size to trigger 'should be offloaded by now'. - # otherwise, if 'flow offload' expression never offloads, the - # test will pass. - tcp dport 12345 meta length gt 200 ct mark set 1 counter + meta oif "veth1" tcp dport 12345 ct mark set 1 flow add @f1 counter name routed_orig accept - # this turns off flow offloading internally, so expect packets again - tcp flags fin,rst ct mark set 0 accept - - # this allows large packets from responder, we need this as long - # as PMTUd is off. - # This rule is deleted for the last test, when we expect PMTUd - # to kick in and ensure all packets meet mtu requirements. - meta length gt $lmtu accept comment something-to-grep-for - - # next line blocks connection w.o. working offload. - # we only do this for reverse dir, because we expect packets to - # enter slow path due to MTU mismatch of veth0 and veth1. - tcp sport 12345 ct mark 1 counter log prefix "mark failure " drop + # count packets supposedly offloaded as per direction. + ct mark 1 counter name ct direction map { original : routed_orig, reply : routed_repl } accept ct state established,related accept - # for packets that we can't offload yet, i.e. SYN (any ct that is not confirmed) - meta length lt 200 oif "veth1" tcp dport 12345 counter accept - meta nfproto ipv4 meta l4proto icmp accept meta nfproto ipv6 meta l4proto icmpv6 accept } @@ -197,30 +189,30 @@ if [ $? -ne 0 ]; then fi # test basic connectivity -if ! ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then - echo "ERROR: ns1 cannot reach ns2" 1>&2 +if ! ip netns exec $ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then + echo "ERROR: $ns1 cannot reach ns2" 1>&2 exit 1 fi -if ! ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then - echo "ERROR: ns2 cannot reach ns1" 1>&2 +if ! ip netns exec $ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then + echo "ERROR: $ns2 cannot reach $ns1" 1>&2 exit 1 fi if [ $ret -eq 0 ];then - echo "PASS: netns routing/connectivity: ns1 can reach ns2" + echo "PASS: netns routing/connectivity: $ns1 can reach $ns2" fi -ns1in=$(mktemp) +nsin=$(mktemp) ns1out=$(mktemp) -ns2in=$(mktemp) ns2out=$(mktemp) make_file() { name=$1 - SIZE=$((RANDOM % (1024 * 8))) + SIZE=$((RANDOM % (1024 * 128))) + SIZE=$((SIZE + (1024 * 8))) TSIZE=$((SIZE * 1024)) dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null @@ -231,6 +223,38 @@ make_file() dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null } +check_counters() +{ + local what=$1 + local ok=1 + + local orig=$(ip netns exec $nsr1 nft reset counter inet filter routed_orig | grep packets) + local repl=$(ip netns exec $nsr1 nft reset counter inet filter routed_repl | grep packets) + + local orig_cnt=${orig#*bytes} + local repl_cnt=${repl#*bytes} + + local fs=$(du -sb $nsin) + local max_orig=${fs%%/*} + local max_repl=$((max_orig/4)) + + if [ $orig_cnt -gt $max_orig ];then + echo "FAIL: $what: original counter $orig_cnt exceeds expected value $max_orig" 1>&2 + ret=1 + ok=0 + fi + + if [ $repl_cnt -gt $max_repl ];then + echo "FAIL: $what: reply counter $repl_cnt exceeds expected value $max_repl" 1>&2 + ret=1 + ok=0 + fi + + if [ $ok -eq 1 ]; then + echo "PASS: $what" + fi +} + check_transfer() { in=$1 @@ -255,11 +279,11 @@ test_tcp_forwarding_ip() local dstport=$4 local lret=0 - ip netns exec $nsb nc -w 5 -l -p 12345 < "$ns2in" > "$ns2out" & + ip netns exec $nsb nc -w 5 -l -p 12345 < "$nsin" > "$ns2out" & lpid=$! sleep 1 - ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$ns1in" > "$ns1out" & + ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$nsin" > "$ns1out" & cpid=$! sleep 3 @@ -274,11 +298,11 @@ test_tcp_forwarding_ip() wait - if ! check_transfer "$ns1in" "$ns2out" "ns1 -> ns2"; then + if ! check_transfer "$nsin" "$ns2out" "ns1 -> ns2"; then lret=1 fi - if ! check_transfer "$ns2in" "$ns1out" "ns1 <- ns2"; then + if ! check_transfer "$nsin" "$ns1out" "ns1 <- ns2"; then lret=1 fi @@ -295,41 +319,59 @@ test_tcp_forwarding() test_tcp_forwarding_nat() { local lret + local pmtu test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 lret=$? + pmtu=$3 + what=$4 + if [ $lret -eq 0 ] ; then + if [ $pmtu -eq 1 ] ;then + check_counters "flow offload for ns1/ns2 with masquerade and pmtu discovery $what" + else + echo "PASS: flow offload for ns1/ns2 with masquerade $what" + fi + test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666 lret=$? + if [ $pmtu -eq 1 ] ;then + check_counters "flow offload for ns1/ns2 with dnat and pmtu discovery $what" + elif [ $lret -eq 0 ] ; then + echo "PASS: flow offload for ns1/ns2 with dnat $what" + fi fi return $lret } -make_file "$ns1in" -make_file "$ns2in" +make_file "$nsin" # First test: # No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed. -if test_tcp_forwarding ns1 ns2; then +# Due to MTU mismatch in both directions, all packets (except small packets like pure +# acks) have to be handled by normal forwarding path. Therefore, packet counters +# are not checked. +if test_tcp_forwarding $ns1 $ns2; then echo "PASS: flow offloaded for ns1/ns2" else echo "FAIL: flow offload for ns1/ns2:" 1>&2 - ip netns exec nsr1 nft list ruleset + ip netns exec $nsr1 nft list ruleset ret=1 fi # delete default route, i.e. ns2 won't be able to reach ns1 and # will depend on ns1 being masqueraded in nsr1. # expect ns1 has nsr1 address. -ip -net ns2 route del default via 10.0.2.1 -ip -net ns2 route del default via dead:2::1 -ip -net ns2 route add 192.168.10.1 via 10.0.2.1 +ip -net $ns2 route del default via 10.0.2.1 +ip -net $ns2 route del default via dead:2::1 +ip -net $ns2 route add 192.168.10.1 via 10.0.2.1 # Second test: -# Same, but with NAT enabled. -ip netns exec nsr1 nft -f - <<EOF +# Same, but with NAT enabled. Same as in first test: we expect normal forward path +# to handle most packets. +ip netns exec $nsr1 nft -f - <<EOF table ip nat { chain prerouting { type nat hook prerouting priority 0; policy accept; @@ -343,47 +385,45 @@ table ip nat { } EOF -if test_tcp_forwarding_nat ns1 ns2; then - echo "PASS: flow offloaded for ns1/ns2 with NAT" -else +if ! test_tcp_forwarding_nat $ns1 $ns2 0 ""; then echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2 - ip netns exec nsr1 nft list ruleset + ip netns exec $nsr1 nft list ruleset ret=1 fi # Third test: -# Same as second test, but with PMTU discovery enabled. -handle=$(ip netns exec nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2) - -if ! ip netns exec nsr1 nft delete rule inet filter forward $handle; then - echo "FAIL: Could not delete large-packet accept rule" - exit 1 -fi - -ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null -ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null - -if test_tcp_forwarding_nat ns1 ns2; then - echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery" -else +# Same as second test, but with PMTU discovery enabled. This +# means that we expect the fastpath to handle packets as soon +# as the endpoints adjust the packet size. +ip netns exec $ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null +ip netns exec $ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null + +# reset counters. +# With pmtu in-place we'll also check that nft counters +# are lower than file size and packets were forwarded via flowtable layer. +# For earlier tests (large mtus), packets cannot be handled via flowtable +# (except pure acks and other small packets). +ip netns exec $nsr1 nft reset counters table inet filter >/dev/null + +if ! test_tcp_forwarding_nat $ns1 $ns2 1 ""; then echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2 - ip netns exec nsr1 nft list ruleset + ip netns exec $nsr1 nft list ruleset fi # Another test: # Add bridge interface br0 to Router1, with NAT enabled. -ip -net nsr1 link add name br0 type bridge -ip -net nsr1 addr flush dev veth0 -ip -net nsr1 link set up dev veth0 -ip -net nsr1 link set veth0 master br0 -ip -net nsr1 addr add 10.0.1.1/24 dev br0 -ip -net nsr1 addr add dead:1::1/64 dev br0 -ip -net nsr1 link set up dev br0 +ip -net $nsr1 link add name br0 type bridge +ip -net $nsr1 addr flush dev veth0 +ip -net $nsr1 link set up dev veth0 +ip -net $nsr1 link set veth0 master br0 +ip -net $nsr1 addr add 10.0.1.1/24 dev br0 +ip -net $nsr1 addr add dead:1::1/64 dev br0 +ip -net $nsr1 link set up dev br0 -ip netns exec nsr1 sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null +ip netns exec $nsr1 sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null # br0 with NAT enabled. -ip netns exec nsr1 nft -f - <<EOF +ip netns exec $nsr1 nft -f - <<EOF flush table ip nat table ip nat { chain prerouting { @@ -398,59 +438,56 @@ table ip nat { } EOF -if test_tcp_forwarding_nat ns1 ns2; then - echo "PASS: flow offloaded for ns1/ns2 with bridge NAT" -else +if ! test_tcp_forwarding_nat $ns1 $ns2 1 "on bridge"; then echo "FAIL: flow offload for ns1/ns2 with bridge NAT" 1>&2 - ip netns exec nsr1 nft list ruleset + ip netns exec $nsr1 nft list ruleset ret=1 fi + # Another test: # Add bridge interface br0 to Router1, with NAT and VLAN. -ip -net nsr1 link set veth0 nomaster -ip -net nsr1 link set down dev veth0 -ip -net nsr1 link add link veth0 name veth0.10 type vlan id 10 -ip -net nsr1 link set up dev veth0 -ip -net nsr1 link set up dev veth0.10 -ip -net nsr1 link set veth0.10 master br0 - -ip -net ns1 addr flush dev eth0 -ip -net ns1 link add link eth0 name eth0.10 type vlan id 10 -ip -net ns1 link set eth0 up -ip -net ns1 link set eth0.10 up -ip -net ns1 addr add 10.0.1.99/24 dev eth0.10 -ip -net ns1 route add default via 10.0.1.1 -ip -net ns1 addr add dead:1::99/64 dev eth0.10 - -if test_tcp_forwarding_nat ns1 ns2; then - echo "PASS: flow offloaded for ns1/ns2 with bridge NAT and VLAN" -else +ip -net $nsr1 link set veth0 nomaster +ip -net $nsr1 link set down dev veth0 +ip -net $nsr1 link add link veth0 name veth0.10 type vlan id 10 +ip -net $nsr1 link set up dev veth0 +ip -net $nsr1 link set up dev veth0.10 +ip -net $nsr1 link set veth0.10 master br0 + +ip -net $ns1 addr flush dev eth0 +ip -net $ns1 link add link eth0 name eth0.10 type vlan id 10 +ip -net $ns1 link set eth0 up +ip -net $ns1 link set eth0.10 up +ip -net $ns1 addr add 10.0.1.99/24 dev eth0.10 +ip -net $ns1 route add default via 10.0.1.1 +ip -net $ns1 addr add dead:1::99/64 dev eth0.10 + +if ! test_tcp_forwarding_nat $ns1 $ns2 1 "bridge and VLAN"; then echo "FAIL: flow offload for ns1/ns2 with bridge NAT and VLAN" 1>&2 - ip netns exec nsr1 nft list ruleset + ip netns exec $nsr1 nft list ruleset ret=1 fi # restore test topology (remove bridge and VLAN) -ip -net nsr1 link set veth0 nomaster -ip -net nsr1 link set veth0 down -ip -net nsr1 link set veth0.10 down -ip -net nsr1 link delete veth0.10 type vlan -ip -net nsr1 link delete br0 type bridge -ip -net ns1 addr flush dev eth0.10 -ip -net ns1 link set eth0.10 down -ip -net ns1 link set eth0 down -ip -net ns1 link delete eth0.10 type vlan +ip -net $nsr1 link set veth0 nomaster +ip -net $nsr1 link set veth0 down +ip -net $nsr1 link set veth0.10 down +ip -net $nsr1 link delete veth0.10 type vlan +ip -net $nsr1 link delete br0 type bridge +ip -net $ns1 addr flush dev eth0.10 +ip -net $ns1 link set eth0.10 down +ip -net $ns1 link set eth0 down +ip -net $ns1 link delete eth0.10 type vlan # restore address in ns1 and nsr1 -ip -net ns1 link set eth0 up -ip -net ns1 addr add 10.0.1.99/24 dev eth0 -ip -net ns1 route add default via 10.0.1.1 -ip -net ns1 addr add dead:1::99/64 dev eth0 -ip -net ns1 route add default via dead:1::1 -ip -net nsr1 addr add 10.0.1.1/24 dev veth0 -ip -net nsr1 addr add dead:1::1/64 dev veth0 -ip -net nsr1 link set up dev veth0 +ip -net $ns1 link set eth0 up +ip -net $ns1 addr add 10.0.1.99/24 dev eth0 +ip -net $ns1 route add default via 10.0.1.1 +ip -net $ns1 addr add dead:1::99/64 dev eth0 +ip -net $ns1 route add default via dead:1::1 +ip -net $nsr1 addr add 10.0.1.1/24 dev veth0 +ip -net $nsr1 addr add dead:1::1/64 dev veth0 +ip -net $nsr1 link set up dev veth0 KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1) KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1) @@ -480,23 +517,23 @@ do_esp() { } -do_esp nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2 +do_esp $nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2 -do_esp nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1 +do_esp $nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1 -ip netns exec nsr1 nft delete table ip nat +ip netns exec $nsr1 nft delete table ip nat # restore default routes -ip -net ns2 route del 192.168.10.1 via 10.0.2.1 -ip -net ns2 route add default via 10.0.2.1 -ip -net ns2 route add default via dead:2::1 +ip -net $ns2 route del 192.168.10.1 via 10.0.2.1 +ip -net $ns2 route add default via 10.0.2.1 +ip -net $ns2 route add default via dead:2::1 -if test_tcp_forwarding ns1 ns2; then - echo "PASS: ipsec tunnel mode for ns1/ns2" +if test_tcp_forwarding $ns1 $ns2; then + check_counters "ipsec tunnel mode for ns1/ns2" else echo "FAIL: ipsec tunnel mode for ns1/ns2" - ip netns exec nsr1 nft list ruleset 1>&2 - ip netns exec nsr1 cat /proc/net/xfrm_stat 1>&2 + ip netns exec $nsr1 nft list ruleset 1>&2 + ip netns exec $nsr1 cat /proc/net/xfrm_stat 1>&2 fi exit $ret diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/event_code_tests/.gitignore new file mode 100644 index 000000000000..5710683da525 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/.gitignore @@ -0,0 +1,20 @@ +blacklisted_events_test +event_alternatives_tests_p10 +event_alternatives_tests_p9 +generic_events_valid_test +group_constraint_cache_test +group_constraint_l2l3_sel_test +group_constraint_mmcra_sample_test +group_constraint_pmc56_test +group_constraint_pmc_count_test +group_constraint_radix_scope_qual_test +group_constraint_repeat_test +group_constraint_thresh_cmp_test +group_constraint_thresh_ctl_test +group_constraint_thresh_sel_test +group_constraint_unit_test +group_pmc56_exclude_constraints_test +hw_cache_event_type_test +invalid_event_code_test +reserved_bits_mmcra_sample_elig_mode_test +reserved_bits_mmcra_thresh_ctl_test diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore index 0fce5a694684..f93b4c7c3a8a 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore @@ -1,11 +1,21 @@ -mmcr0_exceptionbits_test +bhrb_filter_map_test +bhrb_no_crash_wo_pmu_test +intr_regs_no_crash_wo_pmu_test mmcr0_cc56run_test -mmcr0_pmccext_test -mmcr0_pmcjce_test +mmcr0_exceptionbits_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test +mmcr0_pmccext_test +mmcr0_pmcjce_test mmcr1_comb_test -mmcr2_l2l3_test +mmcr1_sel_unit_cache_test mmcr2_fcs_fch_test +mmcr2_l2l3_test mmcr3_src_test +mmcra_bhrb_any_test +mmcra_bhrb_cond_test +mmcra_bhrb_disable_no_branch_test +mmcra_bhrb_disable_test +mmcra_bhrb_ind_call_test +mmcra_thresh_cmp_test mmcra_thresh_marked_sample_test diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile index 1bea2d16d4c1..22e28b76f800 100644 --- a/tools/tracing/rtla/Makefile +++ b/tools/tracing/rtla/Makefile @@ -30,8 +30,8 @@ WOPTS := -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_A TRACEFS_HEADERS := $$($(PKG_CONFIG) --cflags libtracefs) -CFLAGS := -O -g -DVERSION=\"$(VERSION)\" $(FOPTS) $(MOPTS) $(WOPTS) $(TRACEFS_HEADERS) -LDFLAGS := -ggdb +CFLAGS := -O -g -DVERSION=\"$(VERSION)\" $(FOPTS) $(MOPTS) $(WOPTS) $(TRACEFS_HEADERS) $(EXTRA_CFLAGS) +LDFLAGS := -ggdb $(EXTRA_LDFLAGS) LIBS := $$($(PKG_CONFIG) --libs libtracefs) SRC := $(wildcard src/*.c) @@ -61,40 +61,50 @@ endif LIBTRACEEVENT_MIN_VERSION = 1.5 LIBTRACEFS_MIN_VERSION = 1.3 +.PHONY: all warnings show_warnings +all: warnings rtla + TEST_LIBTRACEEVENT = $(shell sh -c "$(PKG_CONFIG) --atleast-version $(LIBTRACEEVENT_MIN_VERSION) libtraceevent > /dev/null 2>&1 || echo n") ifeq ("$(TEST_LIBTRACEEVENT)", "n") -.PHONY: warning_traceevent -warning_traceevent: - @echo "********************************************" - @echo "** NOTICE: libtraceevent version $(LIBTRACEEVENT_MIN_VERSION) or higher not found" - @echo "**" - @echo "** Consider installing the latest libtraceevent from your" - @echo "** distribution, e.g., 'dnf install libtraceevent' on Fedora," - @echo "** or from source:" - @echo "**" - @echo "** https://git.kernel.org/pub/scm/libs/libtrace/libtraceevent.git/ " - @echo "**" - @echo "********************************************" +WARNINGS = show_warnings +MISSING_LIBS += echo "** libtraceevent version $(LIBTRACEEVENT_MIN_VERSION) or higher"; +MISSING_PACKAGES += "libtraceevent-devel" +MISSING_SOURCE += echo "** https://git.kernel.org/pub/scm/libs/libtrace/libtraceevent.git/ "; endif TEST_LIBTRACEFS = $(shell sh -c "$(PKG_CONFIG) --atleast-version $(LIBTRACEFS_MIN_VERSION) libtracefs > /dev/null 2>&1 || echo n") ifeq ("$(TEST_LIBTRACEFS)", "n") -.PHONY: warning_tracefs -warning_tracefs: - @echo "********************************************" - @echo "** NOTICE: libtracefs version $(LIBTRACEFS_MIN_VERSION) or higher not found" - @echo "**" - @echo "** Consider installing the latest libtracefs from your" - @echo "** distribution, e.g., 'dnf install libtracefs' on Fedora," - @echo "** or from source:" - @echo "**" - @echo "** https://git.kernel.org/pub/scm/libs/libtrace/libtracefs.git/ " - @echo "**" - @echo "********************************************" +WARNINGS = show_warnings +MISSING_LIBS += echo "** libtracefs version $(LIBTRACEFS_MIN_VERSION) or higher"; +MISSING_PACKAGES += "libtracefs-devel" +MISSING_SOURCE += echo "** https://git.kernel.org/pub/scm/libs/libtrace/libtracefs.git/ "; endif -.PHONY: all -all: rtla +define show_dependencies + @echo "********************************************"; \ + echo "** NOTICE: Failed build dependencies"; \ + echo "**"; \ + echo "** Required Libraries:"; \ + $(MISSING_LIBS) \ + echo "**"; \ + echo "** Consider installing the latest libtracefs from your"; \ + echo "** distribution, e.g., 'dnf install $(MISSING_PACKAGES)' on Fedora,"; \ + echo "** or from source:"; \ + echo "**"; \ + $(MISSING_SOURCE) \ + echo "**"; \ + echo "********************************************" +endef + +show_warnings: + $(call show_dependencies); + +ifneq ("$(WARNINGS)", "") +ERROR_OUT = $(error Please add the necessary dependencies) + +warnings: $(WARNINGS) + $(ERROR_OUT) +endif rtla: $(OBJ) $(CC) -o rtla $(LDFLAGS) $(OBJ) $(LIBS) @@ -108,9 +118,9 @@ install: doc_install $(INSTALL) rtla -m 755 $(DESTDIR)$(BINDIR) $(STRIP) $(DESTDIR)$(BINDIR)/rtla @test ! -f $(DESTDIR)$(BINDIR)/osnoise || rm $(DESTDIR)$(BINDIR)/osnoise - ln -s $(DESTDIR)$(BINDIR)/rtla $(DESTDIR)$(BINDIR)/osnoise + ln -s rtla $(DESTDIR)$(BINDIR)/osnoise @test ! -f $(DESTDIR)$(BINDIR)/timerlat || rm $(DESTDIR)$(BINDIR)/timerlat - ln -s $(DESTDIR)$(BINDIR)/rtla $(DESTDIR)$(BINDIR)/timerlat + ln -s rtla $(DESTDIR)$(BINDIR)/timerlat .PHONY: clean tarball clean: doc_clean diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c index f3ec628f5e51..4b48af8a8309 100644 --- a/tools/tracing/rtla/src/timerlat_hist.c +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -892,7 +892,7 @@ int timerlat_hist_main(int argc, char *argv[]) return_value = 0; if (trace_is_off(&tool->trace, &record->trace)) { - printf("rtla timelat hit stop tracing\n"); + printf("rtla timerlat hit stop tracing\n"); if (params->trace_output) { printf(" Saving trace to %s\n", params->trace_output); save_trace_to_file(record->trace.inst, params->trace_output); diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c index 35452a1d45e9..334271935222 100644 --- a/tools/tracing/rtla/src/timerlat_top.c +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -687,7 +687,7 @@ int timerlat_top_main(int argc, char *argv[]) return_value = 0; if (trace_is_off(&top->trace, &record->trace)) { - printf("rtla timelat hit stop tracing\n"); + printf("rtla timerlat hit stop tracing\n"); if (params->trace_output) { printf(" Saving trace to %s\n", params->trace_output); save_trace_to_file(record->trace.inst, params->trace_output); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 515dfe9d3bcf..584a5bab3af3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -702,30 +702,31 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, /* * .change_pte() must be surrounded by .invalidate_range_{start,end}(). - * If mmu_notifier_count is zero, then no in-progress invalidations, - * including this one, found a relevant memslot at start(); rechecking - * memslots here is unnecessary. Note, a false positive (count elevated - * by a different invalidation) is sub-optimal but functionally ok. + * If mmu_invalidate_in_progress is zero, then no in-progress + * invalidations, including this one, found a relevant memslot at + * start(); rechecking memslots here is unnecessary. Note, a false + * positive (count elevated by a different invalidation) is sub-optimal + * but functionally ok. */ WARN_ON_ONCE(!READ_ONCE(kvm->mn_active_invalidate_count)); - if (!READ_ONCE(kvm->mmu_notifier_count)) + if (!READ_ONCE(kvm->mmu_invalidate_in_progress)) return; kvm_handle_hva_range(mn, address, address + 1, pte, kvm_set_spte_gfn); } -void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start, - unsigned long end) +void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start, + unsigned long end) { /* * The count increase must become visible at unlock time as no * spte can be established without taking the mmu_lock and * count is also read inside the mmu_lock critical section. */ - kvm->mmu_notifier_count++; - if (likely(kvm->mmu_notifier_count == 1)) { - kvm->mmu_notifier_range_start = start; - kvm->mmu_notifier_range_end = end; + kvm->mmu_invalidate_in_progress++; + if (likely(kvm->mmu_invalidate_in_progress == 1)) { + kvm->mmu_invalidate_range_start = start; + kvm->mmu_invalidate_range_end = end; } else { /* * Fully tracking multiple concurrent ranges has diminishing @@ -736,10 +737,10 @@ void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start, * accumulate and persist until all outstanding invalidates * complete. */ - kvm->mmu_notifier_range_start = - min(kvm->mmu_notifier_range_start, start); - kvm->mmu_notifier_range_end = - max(kvm->mmu_notifier_range_end, end); + kvm->mmu_invalidate_range_start = + min(kvm->mmu_invalidate_range_start, start); + kvm->mmu_invalidate_range_end = + max(kvm->mmu_invalidate_range_end, end); } } @@ -752,7 +753,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, .end = range->end, .pte = __pte(0), .handler = kvm_unmap_gfn_range, - .on_lock = kvm_inc_notifier_count, + .on_lock = kvm_mmu_invalidate_begin, .on_unlock = kvm_arch_guest_memory_reclaimed, .flush_on_ret = true, .may_block = mmu_notifier_range_blockable(range), @@ -763,7 +764,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, /* * Prevent memslot modification between range_start() and range_end() * so that conditionally locking provides the same result in both - * functions. Without that guarantee, the mmu_notifier_count + * functions. Without that guarantee, the mmu_invalidate_in_progress * adjustments will be imbalanced. * * Pairs with the decrement in range_end(). @@ -779,7 +780,8 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, * any given time, and the caches themselves can check for hva overlap, * i.e. don't need to rely on memslot overlap checks for performance. * Because this runs without holding mmu_lock, the pfn caches must use - * mn_active_invalidate_count (see above) instead of mmu_notifier_count. + * mn_active_invalidate_count (see above) instead of + * mmu_invalidate_in_progress. */ gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end, hva_range.may_block); @@ -789,22 +791,22 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, return 0; } -void kvm_dec_notifier_count(struct kvm *kvm, unsigned long start, - unsigned long end) +void kvm_mmu_invalidate_end(struct kvm *kvm, unsigned long start, + unsigned long end) { /* * This sequence increase will notify the kvm page fault that * the page that is going to be mapped in the spte could have * been freed. */ - kvm->mmu_notifier_seq++; + kvm->mmu_invalidate_seq++; smp_wmb(); /* * The above sequence increase must be visible before the * below count decrease, which is ensured by the smp_wmb above - * in conjunction with the smp_rmb in mmu_notifier_retry(). + * in conjunction with the smp_rmb in mmu_invalidate_retry(). */ - kvm->mmu_notifier_count--; + kvm->mmu_invalidate_in_progress--; } static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, @@ -816,7 +818,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, .end = range->end, .pte = __pte(0), .handler = (void *)kvm_null_fn, - .on_lock = kvm_dec_notifier_count, + .on_lock = kvm_mmu_invalidate_end, .on_unlock = (void *)kvm_null_fn, .flush_on_ret = false, .may_block = mmu_notifier_range_blockable(range), @@ -837,7 +839,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, if (wake) rcuwait_wake_up(&kvm->mn_memslots_update_rcuwait); - BUG_ON(kvm->mmu_notifier_count < 0); + BUG_ON(kvm->mmu_invalidate_in_progress < 0); } static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, @@ -1134,6 +1136,9 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) if (!kvm) return ERR_PTR(-ENOMEM); + /* KVM is pinned via open("/dev/kvm"), the fd passed to this ioctl(). */ + __module_get(kvm_chardev_ops.owner); + KVM_MMU_LOCK_INIT(kvm); mmgrab(current->mm); kvm->mm = current->mm; @@ -1211,9 +1216,17 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) if (r) goto out_err_no_mmu_notifier; + r = kvm_coalesced_mmio_init(kvm); + if (r < 0) + goto out_no_coalesced_mmio; + + r = kvm_create_vm_debugfs(kvm, fdname); + if (r) + goto out_err_no_debugfs; + r = kvm_arch_post_init_vm(kvm); if (r) - goto out_err_mmu_notifier; + goto out_err; mutex_lock(&kvm_lock); list_add(&kvm->vm_list, &vm_list); @@ -1222,25 +1235,13 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) preempt_notifier_inc(); kvm_init_pm_notifier(kvm); - /* - * When the fd passed to this ioctl() is opened it pins the module, - * but try_module_get() also prevents getting a reference if the module - * is in MODULE_STATE_GOING (e.g. if someone ran "rmmod --wait"). - */ - if (!try_module_get(kvm_chardev_ops.owner)) { - r = -ENODEV; - goto out_err_mmu_notifier; - } - - r = kvm_create_vm_debugfs(kvm, fdname); - if (r) - goto out_err; - return kvm; out_err: - module_put(kvm_chardev_ops.owner); -out_err_mmu_notifier: + kvm_destroy_vm_debugfs(kvm); +out_err_no_debugfs: + kvm_coalesced_mmio_free(kvm); +out_no_coalesced_mmio: #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) if (kvm->mmu_notifier.ops) mmu_notifier_unregister(&kvm->mmu_notifier, current->mm); @@ -1259,6 +1260,7 @@ out_err_no_irq_srcu: out_err_no_srcu: kvm_arch_free_vm(kvm); mmdrop(current->mm); + module_put(kvm_chardev_ops.owner); return ERR_PTR(r); } @@ -2516,7 +2518,7 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault, { unsigned int flags = FOLL_HWPOISON; struct page *page; - int npages = 0; + int npages; might_sleep(); @@ -4378,7 +4380,7 @@ void kvm_unregister_device_ops(u32 type) static int kvm_ioctl_create_device(struct kvm *kvm, struct kvm_create_device *cd) { - const struct kvm_device_ops *ops = NULL; + const struct kvm_device_ops *ops; struct kvm_device *dev; bool test = cd->flags & KVM_CREATE_DEVICE_TEST; int type; @@ -4913,11 +4915,6 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) goto put_fd; } -#ifdef CONFIG_KVM_MMIO - r = kvm_coalesced_mmio_init(kvm); - if (r < 0) - goto put_kvm; -#endif file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); if (IS_ERR(file)) { r = PTR_ERR(file); diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c index ab519f72f2cd..68ff41d39545 100644 --- a/virt/kvm/pfncache.c +++ b/virt/kvm/pfncache.c @@ -112,27 +112,28 @@ static inline bool mmu_notifier_retry_cache(struct kvm *kvm, unsigned long mmu_s { /* * mn_active_invalidate_count acts for all intents and purposes - * like mmu_notifier_count here; but the latter cannot be used - * here because the invalidation of caches in the mmu_notifier - * event occurs _before_ mmu_notifier_count is elevated. + * like mmu_invalidate_in_progress here; but the latter cannot + * be used here because the invalidation of caches in the + * mmu_notifier event occurs _before_ mmu_invalidate_in_progress + * is elevated. * * Note, it does not matter that mn_active_invalidate_count * is not protected by gpc->lock. It is guaranteed to * be elevated before the mmu_notifier acquires gpc->lock, and - * isn't dropped until after mmu_notifier_seq is updated. + * isn't dropped until after mmu_invalidate_seq is updated. */ if (kvm->mn_active_invalidate_count) return true; /* * Ensure mn_active_invalidate_count is read before - * mmu_notifier_seq. This pairs with the smp_wmb() in + * mmu_invalidate_seq. This pairs with the smp_wmb() in * mmu_notifier_invalidate_range_end() to guarantee either the * old (non-zero) value of mn_active_invalidate_count or the - * new (incremented) value of mmu_notifier_seq is observed. + * new (incremented) value of mmu_invalidate_seq is observed. */ smp_rmb(); - return kvm->mmu_notifier_seq != mmu_seq; + return kvm->mmu_invalidate_seq != mmu_seq; } static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc) @@ -155,7 +156,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc) gpc->valid = false; do { - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); write_unlock_irq(&gpc->lock); |