diff options
45 files changed, 598 insertions, 420 deletions
diff --git a/Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.yaml b/Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.yaml index f6c3fcc4bdfd..b5af12011499 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.yaml @@ -32,6 +32,11 @@ description: | | | vint | bit | | 0 |.....|63| vintx | | +--------------+ +------------+ | | | + | Unmap | + | +--------------+ | + Unmapped events ---->| | umapidx |-------------------------> Globalevents + | +--------------+ | + | | +-----------------------------------------+ Configuration of these Intmap registers that maps global events to vint is @@ -70,6 +75,11 @@ properties: - description: | "limit" specifies the limit for translation + ti,unmapped-event-sources: + $ref: /schemas/types.yaml#definitions/phandle-array + description: + Array of phandles to DMA controllers where the unmapped events originate. + required: - compatible - reg diff --git a/MAINTAINERS b/MAINTAINERS index cd123d0a6a2d..3da6d8c154e4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11163,7 +11163,7 @@ F: Documentation/devicetree/bindings/input/touchscreen/melfas_mip4.txt F: drivers/input/touchscreen/melfas_mip4.c MELLANOX BLUEFIELD I2C DRIVER -M: Khalil Blaiech <[email protected]> +M: Khalil Blaiech <[email protected]> S: Supported F: drivers/i2c/busses/i2c-mlxbf.c diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h index 85ed2390fb99..567cdc557402 100644 --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -63,7 +63,7 @@ static inline void restore_user_access(unsigned long flags) static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { - return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xf0000000), + return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xff000000), "Bug: fault blocked by AP register !"); } diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h index 1d9ac0f9c794..0bd1b144eb76 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h @@ -33,19 +33,18 @@ * respectively NA for All or X for Supervisor and no access for User. * Then we use the APG to say whether accesses are according to Page rules or * "all Supervisor" rules (Access to all) - * Therefore, we define 2 APG groups. lsb is _PMD_USER - * 0 => Kernel => 01 (all accesses performed according to page definition) - * 1 => User => 00 (all accesses performed as supervisor iaw page definition) - * 2-15 => Not Used - */ -#define MI_APG_INIT 0x40000000 - -/* - * 0 => Kernel => 01 (all accesses performed according to page definition) - * 1 => User => 10 (all accesses performed according to swaped page definition) - * 2-15 => Not Used - */ -#define MI_APG_KUEP 0x60000000 + * _PAGE_ACCESSED is also managed via APG. When _PAGE_ACCESSED is not set, say + * "all User" rules, that will lead to NA for all. + * Therefore, we define 4 APG groups. lsb is _PAGE_ACCESSED + * 0 => Kernel => 11 (all accesses performed according as user iaw page definition) + * 1 => Kernel+Accessed => 01 (all accesses performed according to page definition) + * 2 => User => 11 (all accesses performed according as user iaw page definition) + * 3 => User+Accessed => 00 (all accesses performed as supervisor iaw page definition) for INIT + * => 10 (all accesses performed according to swaped page definition) for KUEP + * 4-15 => Not Used + */ +#define MI_APG_INIT 0xdc000000 +#define MI_APG_KUEP 0xde000000 /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MI_RPN is written, bits in @@ -106,25 +105,9 @@ #define MD_Ks 0x80000000 /* Should not be set */ #define MD_Kp 0x40000000 /* Should always be set */ -/* - * All pages' PP data bits are set to either 000 or 011 or 001, which means - * respectively RW for Supervisor and no access for User, or RO for - * Supervisor and no access for user and NA for ALL. - * Then we use the APG to say whether accesses are according to Page rules or - * "all Supervisor" rules (Access to all) - * Therefore, we define 2 APG groups. lsb is _PMD_USER - * 0 => Kernel => 01 (all accesses performed according to page definition) - * 1 => User => 00 (all accesses performed as supervisor iaw page definition) - * 2-15 => Not Used - */ -#define MD_APG_INIT 0x40000000 - -/* - * 0 => No user => 01 (all accesses performed according to page definition) - * 1 => User => 10 (all accesses performed according to swaped page definition) - * 2-15 => Not Used - */ -#define MD_APG_KUAP 0x60000000 +/* See explanation above at the definition of MI_APG_INIT */ +#define MD_APG_INIT 0xdc000000 +#define MD_APG_KUAP 0xde000000 /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MD_RPN is written, bits in diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h index 66f403a7da44..1581204467e1 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h @@ -39,9 +39,9 @@ * into the TLB. */ #define _PAGE_GUARDED 0x0010 /* Copied to L1 G entry in DTLB */ -#define _PAGE_SPECIAL 0x0020 /* SW entry */ +#define _PAGE_ACCESSED 0x0020 /* Copied to L1 APG 1 entry in I/DTLB */ #define _PAGE_EXEC 0x0040 /* Copied to PP (bit 21) in ITLB */ -#define _PAGE_ACCESSED 0x0080 /* software: page referenced */ +#define _PAGE_SPECIAL 0x0080 /* SW entry */ #define _PAGE_NA 0x0200 /* Supervisor NA, User no access */ #define _PAGE_RO 0x0600 /* Supervisor RO, User no access */ @@ -59,11 +59,12 @@ #define _PMD_PRESENT 0x0001 #define _PMD_PRESENT_MASK _PMD_PRESENT -#define _PMD_BAD 0x0fd0 +#define _PMD_BAD 0x0f90 #define _PMD_PAGE_MASK 0x000c #define _PMD_PAGE_8M 0x000c #define _PMD_PAGE_512K 0x0004 -#define _PMD_USER 0x0020 /* APG 1 */ +#define _PMD_ACCESSED 0x0020 /* APG 1 */ +#define _PMD_USER 0x0040 /* APG 2 */ #define _PTE_NONE_MASK 0 diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 8728590f514a..3beeb030cd78 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -6,6 +6,7 @@ struct device; struct device_node; +struct drmem_lmb; #ifdef CONFIG_NUMA @@ -61,6 +62,9 @@ static inline int early_cpu_to_node(int cpu) */ return (nid < 0) ? 0 : nid; } + +int of_drconf_to_nid_single(struct drmem_lmb *lmb); + #else static inline int early_cpu_to_node(int cpu) { return 0; } @@ -84,10 +88,12 @@ static inline int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc) return 0; } -#endif /* CONFIG_NUMA */ +static inline int of_drconf_to_nid_single(struct drmem_lmb *lmb) +{ + return first_online_node; +} -struct drmem_lmb; -int of_drconf_to_nid_single(struct drmem_lmb *lmb); +#endif /* CONFIG_NUMA */ #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR) extern int find_and_online_cpu_nid(int cpu); diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index ef5bbb705c08..501c9a79038c 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -178,7 +178,7 @@ do { \ * are no aliasing issues. */ #define __put_user_asm_goto(x, addr, label, op) \ - asm volatile goto( \ + asm_volatile_goto( \ "1: " op "%U1%X1 %0,%1 # put_user\n" \ EX_TABLE(1b, %l2) \ : \ @@ -191,7 +191,7 @@ do { \ __put_user_asm_goto(x, ptr, label, "std") #else /* __powerpc64__ */ #define __put_user_asm2_goto(x, addr, label) \ - asm volatile goto( \ + asm_volatile_goto( \ "1: stw%X1 %0, %1\n" \ "2: stw%X1 %L0, %L1\n" \ EX_TABLE(1b, %l2) \ diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c index 6b50bf15d8c1..bf3270426d82 100644 --- a/arch/powerpc/kernel/eeh_cache.c +++ b/arch/powerpc/kernel/eeh_cache.c @@ -264,8 +264,9 @@ static int eeh_addr_cache_show(struct seq_file *s, void *v) { struct pci_io_addr_range *piar; struct rb_node *n; + unsigned long flags; - spin_lock(&pci_io_addr_cache_root.piar_lock); + spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); for (n = rb_first(&pci_io_addr_cache_root.rb_root); n; n = rb_next(n)) { piar = rb_entry(n, struct pci_io_addr_range, rb_node); @@ -273,7 +274,7 @@ static int eeh_addr_cache_show(struct seq_file *s, void *v) (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", &piar->addr_lo, &piar->addr_hi, pci_name(piar->pcidev)); } - spin_unlock(&pci_io_addr_cache_root.piar_lock); + spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); return 0; } diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 44c9018aed1b..a1ae00689e0f 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -284,11 +284,7 @@ _ENTRY(saved_ksp_limit) rlwimi r11, r10, 22, 20, 29 /* Compute PTE address */ lwz r11, 0(r11) /* Get Linux PTE */ -#ifdef CONFIG_SWAP li r9, _PAGE_PRESENT | _PAGE_ACCESSED -#else - li r9, _PAGE_PRESENT -#endif andc. r9, r9, r11 /* Check permission */ bne 5f @@ -369,11 +365,7 @@ _ENTRY(saved_ksp_limit) rlwimi r11, r10, 22, 20, 29 /* Compute PTE address */ lwz r11, 0(r11) /* Get Linux PTE */ -#ifdef CONFIG_SWAP li r9, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC -#else - li r9, _PAGE_PRESENT | _PAGE_EXEC -#endif andc. r9, r9, r11 /* Check permission */ bne 5f diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 9f359d3fba74..ee0bfebc375f 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -202,9 +202,7 @@ SystemCall: InstructionTLBMiss: mtspr SPRN_SPRG_SCRATCH0, r10 -#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) || defined(CONFIG_HUGETLBFS) mtspr SPRN_SPRG_SCRATCH1, r11 -#endif /* If we are faulting a kernel address, we have to use the * kernel page tables. @@ -224,25 +222,13 @@ InstructionTLBMiss: 3: mtcr r11 #endif -#if defined(CONFIG_HUGETLBFS) || !defined(CONFIG_PIN_TLB_TEXT) lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */ mtspr SPRN_MD_TWC, r11 -#else - lwz r10, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */ - mtspr SPRN_MI_TWC, r10 /* Set segment attributes */ - mtspr SPRN_MD_TWC, r10 -#endif mfspr r10, SPRN_MD_TWC lwz r10, 0(r10) /* Get the pte */ -#if defined(CONFIG_HUGETLBFS) || !defined(CONFIG_PIN_TLB_TEXT) + rlwimi r11, r10, 0, _PAGE_GUARDED | _PAGE_ACCESSED rlwimi r11, r10, 32 - 9, _PMD_PAGE_512K mtspr SPRN_MI_TWC, r11 -#endif -#ifdef CONFIG_SWAP - rlwinm r11, r10, 32-5, _PAGE_PRESENT - and r11, r11, r10 - rlwimi r10, r11, 0, _PAGE_PRESENT -#endif /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 20 and 23 must be clear. * Software indicator bits 22, 24, 25, 26, and 27 must be @@ -256,9 +242,7 @@ InstructionTLBMiss: /* Restore registers */ 0: mfspr r10, SPRN_SPRG_SCRATCH0 -#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) || defined(CONFIG_HUGETLBFS) mfspr r11, SPRN_SPRG_SCRATCH1 -#endif rfi patch_site 0b, patch__itlbmiss_exit_1 @@ -268,9 +252,7 @@ InstructionTLBMiss: addi r10, r10, 1 stw r10, (itlb_miss_counter - PAGE_OFFSET)@l(0) mfspr r10, SPRN_SPRG_SCRATCH0 -#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) mfspr r11, SPRN_SPRG_SCRATCH1 -#endif rfi #endif @@ -297,30 +279,16 @@ DataStoreTLBMiss: mfspr r10, SPRN_MD_TWC lwz r10, 0(r10) /* Get the pte */ - /* Insert the Guarded flag into the TWC from the Linux PTE. + /* Insert Guarded and Accessed flags into the TWC from the Linux PTE. * It is bit 27 of both the Linux PTE and the TWC (at least * I got that right :-). It will be better when we can put * this into the Linux pgd/pmd and load it in the operation * above. */ - rlwimi r11, r10, 0, _PAGE_GUARDED + rlwimi r11, r10, 0, _PAGE_GUARDED | _PAGE_ACCESSED rlwimi r11, r10, 32 - 9, _PMD_PAGE_512K mtspr SPRN_MD_TWC, r11 - /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set. - * We also need to know if the insn is a load/store, so: - * Clear _PAGE_PRESENT and load that which will - * trap into DTLB Error with store bit set accordinly. - */ - /* PRESENT=0x1, ACCESSED=0x20 - * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5)); - * r10 = (r10 & ~PRESENT) | r11; - */ -#ifdef CONFIG_SWAP - rlwinm r11, r10, 32-5, _PAGE_PRESENT - and r11, r11, r10 - rlwimi r10, r11, 0, _PAGE_PRESENT -#endif /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 24, 25, 26, and 27 must be * set. All other Linux PTE bits control the behavior @@ -711,7 +679,7 @@ initial_mmu: li r9, 4 /* up to 4 pages of 8M */ mtctr r9 lis r9, KERNELBASE@h /* Create vaddr for TLB */ - li r10, MI_PS8MEG | MI_SVALID /* Set 8M byte page */ + li r10, MI_PS8MEG | _PMD_ACCESSED | MI_SVALID li r11, MI_BOOTINIT /* Create RPN for address 0 */ 1: mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */ @@ -775,7 +743,7 @@ _GLOBAL(mmu_pin_tlb) #ifdef CONFIG_PIN_TLB_TEXT LOAD_REG_IMMEDIATE(r5, 28 << 8) LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET) - LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG) + LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED) LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT) LOAD_REG_ADDR(r9, _sinittext) li r0, 4 @@ -797,7 +765,7 @@ _GLOBAL(mmu_pin_tlb) LOAD_REG_IMMEDIATE(r5, 28 << 8 | MD_TWAM) #ifdef CONFIG_PIN_TLB_DATA LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET) - LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG) + LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED) #ifdef CONFIG_PIN_TLB_IMMR li r0, 3 #else @@ -834,7 +802,7 @@ _GLOBAL(mmu_pin_tlb) #endif #ifdef CONFIG_PIN_TLB_IMMR LOAD_REG_IMMEDIATE(r0, VIRT_IMMR_BASE | MD_EVALID) - LOAD_REG_IMMEDIATE(r7, MD_SVALID | MD_PS512K | MD_GUARDED) + LOAD_REG_IMMEDIATE(r7, MD_SVALID | MD_PS512K | MD_GUARDED | _PMD_ACCESSED) mfspr r8, SPRN_IMMR rlwinm r8, r8, 0, 0xfff80000 ori r8, r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | \ diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 5eb9eedac920..2aa16d5368e1 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -457,11 +457,7 @@ InstructionTLBMiss: cmplw 0,r1,r3 #endif mfspr r2, SPRN_SPRG_PGDIR -#ifdef CONFIG_SWAP li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC -#else - li r1,_PAGE_PRESENT | _PAGE_EXEC -#endif #if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ @@ -523,11 +519,7 @@ DataLoadTLBMiss: lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 mfspr r2, SPRN_SPRG_PGDIR -#ifdef CONFIG_SWAP li r1, _PAGE_PRESENT | _PAGE_ACCESSED -#else - li r1, _PAGE_PRESENT -#endif bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ @@ -603,11 +595,7 @@ DataStoreTLBMiss: lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 mfspr r2, SPRN_SPRG_PGDIR -#ifdef CONFIG_SWAP li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED -#else - li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT -#endif bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 3c6b9822f978..8c2857cbd960 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1393,13 +1393,14 @@ static void add_cpu_to_masks(int cpu) /* Activate a secondary processor. */ void start_secondary(void *unused) { - unsigned int cpu = smp_processor_id(); + unsigned int cpu = raw_smp_processor_id(); mmgrab(&init_mm); current->active_mm = &init_mm; smp_store_cpu_info(cpu); set_dec(tb_ticks_per_jiffy); + rcu_cpu_starting(cpu); preempt_disable(); cpu_callin_map[cpu] = 1; diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index c47e6b35c551..824b2c9da75b 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -476,7 +476,7 @@ do { \ do { \ long __kr_err; \ \ - __put_user_nocheck(*((type *)(dst)), (type *)(src), __kr_err); \ + __put_user_nocheck(*((type *)(src)), (type *)(dst), __kr_err); \ if (unlikely(__kr_err)) \ goto err_label; \ } while (0) diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 99e12faa5498..765b62434f30 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2013 Linaro Limited * Author: AKASHI Takahiro <[email protected]> diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 11e2a4fe66e0..7e849797c9c3 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -35,6 +35,10 @@ ENTRY(_start) .word 0 #endif .balign 8 +#ifdef CONFIG_RISCV_M_MODE + /* Image load offset (0MB) from start of RAM for M-mode */ + .dword 0 +#else #if __riscv_xlen == 64 /* Image load offset(2MB) from start of RAM */ .dword 0x200000 @@ -42,6 +46,7 @@ ENTRY(_start) /* Image load offset(4MB) from start of RAM */ .dword 0x400000 #endif +#endif /* Effective size of kernel image */ .dword _end - _start .dword __HEAD_FLAGS diff --git a/arch/riscv/kernel/vdso/.gitignore b/arch/riscv/kernel/vdso/.gitignore index 11ebee9e4c1d..3a19def868ec 100644 --- a/arch/riscv/kernel/vdso/.gitignore +++ b/arch/riscv/kernel/vdso/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only vdso.lds *.tmp +vdso-syms.S diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 7d6a94d45ec9..cb8f9e4cfcbf 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -43,19 +43,14 @@ $(obj)/vdso.o: $(obj)/vdso.so SYSCFLAGS_vdso.so.dbg = $(c_flags) $(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso) FORCE $(call if_changed,vdsold) +SYSCFLAGS_vdso.so.dbg = -shared -s -Wl,-soname=linux-vdso.so.1 \ + -Wl,--build-id -Wl,--hash-style=both # We also create a special relocatable object that should mirror the symbol # table and layout of the linked DSO. With ld --just-symbols we can then # refer to these symbols in the kernel code rather than hand-coded addresses. - -SYSCFLAGS_vdso.so.dbg = -shared -s -Wl,-soname=linux-vdso.so.1 \ - -Wl,--build-id=sha1 -Wl,--hash-style=both -$(obj)/vdso-dummy.o: $(src)/vdso.lds $(obj)/rt_sigreturn.o FORCE - $(call if_changed,vdsold) - -LDFLAGS_vdso-syms.o := -r --just-symbols -$(obj)/vdso-syms.o: $(obj)/vdso-dummy.o FORCE - $(call if_changed,ld) +$(obj)/vdso-syms.S: $(obj)/vdso.so FORCE + $(call if_changed,so2s) # strip rule for the .so file $(obj)/%.so: OBJCOPYFLAGS := -S @@ -73,6 +68,11 @@ quiet_cmd_vdsold = VDSOLD $@ $(patsubst %, -G __vdso_%, $(vdso-syms)) [email protected] $@ && \ +# Extracts symbol offsets from the VDSO, converting them into an assembly file +# that contains the same symbols at the same offsets. +quiet_cmd_so2s = SO2S $@ + cmd_so2s = $(NM) -D $< | $(srctree)/$(src)/so2s.sh > $@ + # install commands for the unstripped file quiet_cmd_vdso_install = INSTALL $@ cmd_vdso_install = cp $(obj)/[email protected] $(MODLIB)/vdso/$@ diff --git a/arch/riscv/kernel/vdso/so2s.sh b/arch/riscv/kernel/vdso/so2s.sh new file mode 100755 index 000000000000..e64cb6d9440e --- /dev/null +++ b/arch/riscv/kernel/vdso/so2s.sh @@ -0,0 +1,6 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0+ +# Copyright 2020 Palmer Dabbelt <[email protected]> + +sed 's!\([0-9a-f]*\) T \([a-z0-9_]*\)\(@@LINUX_4.15\)*!.global \2\n.set \2,0x\1!' \ +| grep '^\.' diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 1359e21c0c62..3c8b9e433c67 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -86,6 +86,7 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a pmd_t *pmd, *pmd_k; pte_t *pte_k; int index; + unsigned long pfn; /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) @@ -100,7 +101,8 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a * of a task switch. */ index = pgd_index(addr); - pgd = (pgd_t *)pfn_to_virt(csr_read(CSR_SATP)) + index; + pfn = csr_read(CSR_SATP) & SATP_PPN; + pgd = (pgd_t *)pfn_to_virt(pfn) + index; pgd_k = init_mm.pgd + index; if (!pgd_present(*pgd_k)) { diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index ea933b789a88..8e577f14f120 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -154,9 +154,8 @@ disable: void __init setup_bootmem(void) { - phys_addr_t mem_size = 0; - phys_addr_t total_mem = 0; - phys_addr_t mem_start, start, end = 0; + phys_addr_t mem_start = 0; + phys_addr_t start, end = 0; phys_addr_t vmlinux_end = __pa_symbol(&_end); phys_addr_t vmlinux_start = __pa_symbol(&_start); u64 i; @@ -164,21 +163,18 @@ void __init setup_bootmem(void) /* Find the memory region containing the kernel */ for_each_mem_range(i, &start, &end) { phys_addr_t size = end - start; - if (!total_mem) + if (!mem_start) mem_start = start; if (start <= vmlinux_start && vmlinux_end <= end) BUG_ON(size == 0); - total_mem = total_mem + size; } /* - * Remove memblock from the end of usable area to the - * end of region + * The maximal physical memory size is -PAGE_OFFSET. + * Make sure that any memory beyond mem_start + (-PAGE_OFFSET) is removed + * as it is unusable by kernel. */ - mem_size = min(total_mem, (phys_addr_t)-PAGE_OFFSET); - if (mem_start + mem_size < end) - memblock_remove(mem_start + mem_size, - end - mem_start - mem_size); + memblock_enforce_memory_limit(mem_start - PAGE_OFFSET); /* Reserve from the start of the kernel to the end of the kernel */ memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); @@ -297,6 +293,7 @@ pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss; #define NUM_EARLY_PMDS (1UL + MAX_EARLY_MAPPING_SIZE / PGDIR_SIZE) #endif pmd_t early_pmd[PTRS_PER_PMD * NUM_EARLY_PMDS] __initdata __aligned(PAGE_SIZE); +pmd_t early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); static pmd_t *__init get_pmd_virt_early(phys_addr_t pa) { @@ -494,6 +491,18 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) load_pa + (va - PAGE_OFFSET), map_size, PAGE_KERNEL_EXEC); +#ifndef __PAGETABLE_PMD_FOLDED + /* Setup early PMD for DTB */ + create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA, + (uintptr_t)early_dtb_pmd, PGDIR_SIZE, PAGE_TABLE); + /* Create two consecutive PMD mappings for FDT early scan */ + pa = dtb_pa & ~(PMD_SIZE - 1); + create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA, + pa, PMD_SIZE, PAGE_KERNEL); + create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA + PMD_SIZE, + pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL); + dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1)); +#else /* Create two consecutive PGD mappings for FDT early scan */ pa = dtb_pa & ~(PGDIR_SIZE - 1); create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA, @@ -501,6 +510,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA + PGDIR_SIZE, pa + PGDIR_SIZE, PGDIR_SIZE, PAGE_KERNEL); dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PGDIR_SIZE - 1)); +#endif dtb_early_pa = dtb_pa; /* diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h index cfd00ad40355..c24d9b5ad81a 100644 --- a/drivers/block/null_blk.h +++ b/drivers/block/null_blk.h @@ -47,7 +47,7 @@ struct nullb_device { unsigned int nr_zones_closed; struct blk_zone *zones; sector_t zone_size_sects; - spinlock_t zone_dev_lock; + spinlock_t zone_lock; unsigned long *zone_locks; unsigned long size; /* device size in MB */ diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c index 8775acbb4f8f..beb34b4f76b0 100644 --- a/drivers/block/null_blk_zoned.c +++ b/drivers/block/null_blk_zoned.c @@ -46,11 +46,20 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) if (!dev->zones) return -ENOMEM; - spin_lock_init(&dev->zone_dev_lock); - dev->zone_locks = bitmap_zalloc(dev->nr_zones, GFP_KERNEL); - if (!dev->zone_locks) { - kvfree(dev->zones); - return -ENOMEM; + /* + * With memory backing, the zone_lock spinlock needs to be temporarily + * released to avoid scheduling in atomic context. To guarantee zone + * information protection, use a bitmap to lock zones with + * wait_on_bit_lock_io(). Sleeping on the lock is OK as memory backing + * implies that the queue is marked with BLK_MQ_F_BLOCKING. + */ + spin_lock_init(&dev->zone_lock); + if (dev->memory_backed) { + dev->zone_locks = bitmap_zalloc(dev->nr_zones, GFP_KERNEL); + if (!dev->zone_locks) { + kvfree(dev->zones); + return -ENOMEM; + } } if (dev->zone_nr_conv >= dev->nr_zones) { @@ -137,12 +146,17 @@ void null_free_zoned_dev(struct nullb_device *dev) static inline void null_lock_zone(struct nullb_device *dev, unsigned int zno) { - wait_on_bit_lock_io(dev->zone_locks, zno, TASK_UNINTERRUPTIBLE); + if (dev->memory_backed) + wait_on_bit_lock_io(dev->zone_locks, zno, TASK_UNINTERRUPTIBLE); + spin_lock_irq(&dev->zone_lock); } static inline void null_unlock_zone(struct nullb_device *dev, unsigned int zno) { - clear_and_wake_up_bit(zno, dev->zone_locks); + spin_unlock_irq(&dev->zone_lock); + + if (dev->memory_backed) + clear_and_wake_up_bit(zno, dev->zone_locks); } int null_report_zones(struct gendisk *disk, sector_t sector, @@ -322,7 +336,6 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, return null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors); null_lock_zone(dev, zno); - spin_lock(&dev->zone_dev_lock); switch (zone->cond) { case BLK_ZONE_COND_FULL: @@ -375,9 +388,17 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, if (zone->cond != BLK_ZONE_COND_EXP_OPEN) zone->cond = BLK_ZONE_COND_IMP_OPEN; - spin_unlock(&dev->zone_dev_lock); + /* + * Memory backing allocation may sleep: release the zone_lock spinlock + * to avoid scheduling in atomic context. Zone operation atomicity is + * still guaranteed through the zone_locks bitmap. + */ + if (dev->memory_backed) + spin_unlock_irq(&dev->zone_lock); ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors); - spin_lock(&dev->zone_dev_lock); + if (dev->memory_backed) + spin_lock_irq(&dev->zone_lock); + if (ret != BLK_STS_OK) goto unlock; @@ -392,7 +413,6 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, ret = BLK_STS_OK; unlock: - spin_unlock(&dev->zone_dev_lock); null_unlock_zone(dev, zno); return ret; @@ -516,9 +536,7 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op, null_lock_zone(dev, i); zone = &dev->zones[i]; if (zone->cond != BLK_ZONE_COND_EMPTY) { - spin_lock(&dev->zone_dev_lock); null_reset_zone(dev, zone); - spin_unlock(&dev->zone_dev_lock); trace_nullb_zone_op(cmd, i, zone->cond); } null_unlock_zone(dev, i); @@ -530,7 +548,6 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op, zone = &dev->zones[zone_no]; null_lock_zone(dev, zone_no); - spin_lock(&dev->zone_dev_lock); switch (op) { case REQ_OP_ZONE_RESET: @@ -550,8 +567,6 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op, break; } - spin_unlock(&dev->zone_dev_lock); - if (ret == BLK_STS_OK) trace_nullb_zone_op(cmd, zone_no, zone->cond); diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index a4f473ef4e5c..a97a9d058198 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -733,7 +733,7 @@ config I2C_LPC2K config I2C_MLXBF tristate "Mellanox BlueField I2C controller" - depends on ARM64 + depends on MELLANOX_PLATFORM && ARM64 help Enabling this option will add I2C SMBus support for Mellanox BlueField system. diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c index 44974b53a626..0d15f4c1e9f7 100644 --- a/drivers/i2c/busses/i2c-designware-slave.c +++ b/drivers/i2c/busses/i2c-designware-slave.c @@ -159,7 +159,6 @@ static int i2c_dw_irq_handler_slave(struct dw_i2c_dev *dev) u32 raw_stat, stat, enabled, tmp; u8 val = 0, slave_activity; - regmap_read(dev->map, DW_IC_INTR_STAT, &stat); regmap_read(dev->map, DW_IC_ENABLE, &enabled); regmap_read(dev->map, DW_IC_RAW_INTR_STAT, &raw_stat); regmap_read(dev->map, DW_IC_STATUS, &tmp); @@ -168,32 +167,30 @@ static int i2c_dw_irq_handler_slave(struct dw_i2c_dev *dev) if (!enabled || !(raw_stat & ~DW_IC_INTR_ACTIVITY) || !dev->slave) return 0; + stat = i2c_dw_read_clear_intrbits_slave(dev); dev_dbg(dev->dev, "%#x STATUS SLAVE_ACTIVITY=%#x : RAW_INTR_STAT=%#x : INTR_STAT=%#x\n", enabled, slave_activity, raw_stat, stat); - if ((stat & DW_IC_INTR_RX_FULL) && (stat & DW_IC_INTR_STOP_DET)) - i2c_slave_event(dev->slave, I2C_SLAVE_WRITE_REQUESTED, &val); + if (stat & DW_IC_INTR_RX_FULL) { + if (dev->status != STATUS_WRITE_IN_PROGRESS) { + dev->status = STATUS_WRITE_IN_PROGRESS; + i2c_slave_event(dev->slave, I2C_SLAVE_WRITE_REQUESTED, + &val); + } + + regmap_read(dev->map, DW_IC_DATA_CMD, &tmp); + val = tmp; + if (!i2c_slave_event(dev->slave, I2C_SLAVE_WRITE_RECEIVED, + &val)) + dev_vdbg(dev->dev, "Byte %X acked!", val); + } if (stat & DW_IC_INTR_RD_REQ) { if (slave_activity) { - if (stat & DW_IC_INTR_RX_FULL) { - regmap_read(dev->map, DW_IC_DATA_CMD, &tmp); - val = tmp; - - if (!i2c_slave_event(dev->slave, - I2C_SLAVE_WRITE_RECEIVED, - &val)) { - dev_vdbg(dev->dev, "Byte %X acked!", - val); - } - regmap_read(dev->map, DW_IC_CLR_RD_REQ, &tmp); - stat = i2c_dw_read_clear_intrbits_slave(dev); - } else { - regmap_read(dev->map, DW_IC_CLR_RD_REQ, &tmp); - regmap_read(dev->map, DW_IC_CLR_RX_UNDER, &tmp); - stat = i2c_dw_read_clear_intrbits_slave(dev); - } + regmap_read(dev->map, DW_IC_CLR_RD_REQ, &tmp); + + dev->status = STATUS_READ_IN_PROGRESS; if (!i2c_slave_event(dev->slave, I2C_SLAVE_READ_REQUESTED, &val)) @@ -205,21 +202,11 @@ static int i2c_dw_irq_handler_slave(struct dw_i2c_dev *dev) if (!i2c_slave_event(dev->slave, I2C_SLAVE_READ_PROCESSED, &val)) regmap_read(dev->map, DW_IC_CLR_RX_DONE, &tmp); - - i2c_slave_event(dev->slave, I2C_SLAVE_STOP, &val); - stat = i2c_dw_read_clear_intrbits_slave(dev); - return 1; } - if (stat & DW_IC_INTR_RX_FULL) { - regmap_read(dev->map, DW_IC_DATA_CMD, &tmp); - val = tmp; - if (!i2c_slave_event(dev->slave, I2C_SLAVE_WRITE_RECEIVED, - &val)) - dev_vdbg(dev->dev, "Byte %X acked!", val); - } else { + if (stat & DW_IC_INTR_STOP_DET) { + dev->status = STATUS_IDLE; i2c_slave_event(dev->slave, I2C_SLAVE_STOP, &val); - stat = i2c_dw_read_clear_intrbits_slave(dev); } return 1; @@ -230,7 +217,6 @@ static irqreturn_t i2c_dw_isr_slave(int this_irq, void *dev_id) struct dw_i2c_dev *dev = dev_id; int ret; - i2c_dw_read_clear_intrbits_slave(dev); ret = i2c_dw_irq_handler_slave(dev); if (ret > 0) complete(&dev->cmd_complete); diff --git a/drivers/i2c/busses/i2c-mlxbf.c b/drivers/i2c/busses/i2c-mlxbf.c index ee59e0da082d..33574d40ea9c 100644 --- a/drivers/i2c/busses/i2c-mlxbf.c +++ b/drivers/i2c/busses/i2c-mlxbf.c @@ -62,10 +62,8 @@ * Master. Default value is set to 400MHz. */ #define MLXBF_I2C_TYU_PLL_OUT_FREQ (400 * 1000 * 1000) -/* Reference clock for Bluefield 1 - 156 MHz. */ -#define MLXBF_I2C_TYU_PLL_IN_FREQ (156 * 1000 * 1000) -/* Reference clock for BlueField 2 - 200 MHz. */ -#define MLXBF_I2C_YU_PLL_IN_FREQ (200 * 1000 * 1000) +/* Reference clock for Bluefield - 156 MHz. */ +#define MLXBF_I2C_PLL_IN_FREQ (156 * 1000 * 1000) /* Constant used to determine the PLL frequency. */ #define MLNXBF_I2C_COREPLL_CONST 16384 @@ -489,44 +487,6 @@ static struct mutex mlxbf_i2c_bus_lock; #define MLXBF_I2C_FREQUENCY_1GHZ 1000000000 -static void mlxbf_i2c_write(void __iomem *io, int reg, u32 val) -{ - writel(val, io + reg); -} - -static u32 mlxbf_i2c_read(void __iomem *io, int reg) -{ - return readl(io + reg); -} - -/* - * This function is used to read data from Master GW Data Descriptor. - * Data bytes in the Master GW Data Descriptor are shifted left so the - * data starts at the MSB of the descriptor registers as set by the - * underlying hardware. TYU_READ_DATA enables byte swapping while - * reading data bytes, and MUST be called by the SMBus read routines - * to copy data from the 32 * 32-bit HW Data registers a.k.a Master GW - * Data Descriptor. - */ -static u32 mlxbf_i2c_read_data(void __iomem *io, int reg) -{ - return (u32)be32_to_cpu(mlxbf_i2c_read(io, reg)); -} - -/* - * This function is used to write data to the Master GW Data Descriptor. - * Data copied to the Master GW Data Descriptor MUST be shifted left so - * the data starts at the MSB of the descriptor registers as required by - * the underlying hardware. TYU_WRITE_DATA enables byte swapping when - * writing data bytes, and MUST be called by the SMBus write routines to - * copy data to the 32 * 32-bit HW Data registers a.k.a Master GW Data - * Descriptor. - */ -static void mlxbf_i2c_write_data(void __iomem *io, int reg, u32 val) -{ - mlxbf_i2c_write(io, reg, (u32)cpu_to_be32(val)); -} - /* * Function to poll a set of bits at a specific address; it checks whether * the bits are equal to zero when eq_zero is set to 'true', and not equal @@ -541,7 +501,7 @@ static u32 mlxbf_smbus_poll(void __iomem *io, u32 addr, u32 mask, timeout = (timeout / MLXBF_I2C_POLL_FREQ_IN_USEC) + 1; do { - bits = mlxbf_i2c_read(io, addr) & mask; + bits = readl(io + addr) & mask; if (eq_zero ? bits == 0 : bits != 0) return eq_zero ? 1 : bits; udelay(MLXBF_I2C_POLL_FREQ_IN_USEC); @@ -609,16 +569,16 @@ static int mlxbf_i2c_smbus_check_status(struct mlxbf_i2c_priv *priv) MLXBF_I2C_SMBUS_TIMEOUT); /* Read cause status bits. */ - cause_status_bits = mlxbf_i2c_read(priv->mst_cause->io, - MLXBF_I2C_CAUSE_ARBITER); + cause_status_bits = readl(priv->mst_cause->io + + MLXBF_I2C_CAUSE_ARBITER); cause_status_bits &= MLXBF_I2C_CAUSE_MASTER_ARBITER_BITS_MASK; /* * Parse both Cause and Master GW bits, then return transaction status. */ - master_status_bits = mlxbf_i2c_read(priv->smbus->io, - MLXBF_I2C_SMBUS_MASTER_STATUS); + master_status_bits = readl(priv->smbus->io + + MLXBF_I2C_SMBUS_MASTER_STATUS); master_status_bits &= MLXBF_I2C_SMBUS_MASTER_STATUS_MASK; if (mlxbf_i2c_smbus_transaction_success(master_status_bits, @@ -649,10 +609,17 @@ static void mlxbf_i2c_smbus_write_data(struct mlxbf_i2c_priv *priv, aligned_length = round_up(length, 4); - /* Copy data bytes from 4-byte aligned source buffer. */ + /* + * Copy data bytes from 4-byte aligned source buffer. + * Data copied to the Master GW Data Descriptor MUST be shifted + * left so the data starts at the MSB of the descriptor registers + * as required by the underlying hardware. Enable byte swapping + * when writing data bytes to the 32 * 32-bit HW Data registers + * a.k.a Master GW Data Descriptor. + */ for (offset = 0; offset < aligned_length; offset += sizeof(u32)) { data32 = *((u32 *)(data + offset)); - mlxbf_i2c_write_data(priv->smbus->io, addr + offset, data32); + iowrite32be(data32, priv->smbus->io + addr + offset); } } @@ -664,15 +631,23 @@ static void mlxbf_i2c_smbus_read_data(struct mlxbf_i2c_priv *priv, mask = sizeof(u32) - 1; + /* + * Data bytes in the Master GW Data Descriptor are shifted left + * so the data starts at the MSB of the descriptor registers as + * set by the underlying hardware. Enable byte swapping while + * reading data bytes from the 32 * 32-bit HW Data registers + * a.k.a Master GW Data Descriptor. + */ + for (offset = 0; offset < (length & ~mask); offset += sizeof(u32)) { - data32 = mlxbf_i2c_read_data(priv->smbus->io, addr + offset); + data32 = ioread32be(priv->smbus->io + addr + offset); *((u32 *)(data + offset)) = data32; } if (!(length & mask)) return; - data32 = mlxbf_i2c_read_data(priv->smbus->io, addr + offset); + data32 = ioread32be(priv->smbus->io + addr + offset); for (byte = 0; byte < (length & mask); byte++) { data[offset + byte] = data32 & GENMASK(7, 0); @@ -698,16 +673,16 @@ static int mlxbf_i2c_smbus_enable(struct mlxbf_i2c_priv *priv, u8 slave, command |= rol32(pec_en, MLXBF_I2C_MASTER_SEND_PEC_SHIFT); /* Clear status bits. */ - mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_MASTER_STATUS, 0x0); + writel(0x0, priv->smbus->io + MLXBF_I2C_SMBUS_MASTER_STATUS); /* Set the cause data. */ - mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_CAUSE_OR_CLEAR, ~0x0); + writel(~0x0, priv->smbus->io + MLXBF_I2C_CAUSE_OR_CLEAR); /* Zero PEC byte. */ - mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_MASTER_PEC, 0x0); + writel(0x0, priv->smbus->io + MLXBF_I2C_SMBUS_MASTER_PEC); /* Zero byte count. */ - mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_RS_BYTES, 0x0); + writel(0x0, priv->smbus->io + MLXBF_I2C_SMBUS_RS_BYTES); /* GW activation. */ - mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_MASTER_GW, command); + writel(command, priv->smbus->io + MLXBF_I2C_SMBUS_MASTER_GW); /* * Poll master status and check status bits. An ACK is sent when @@ -823,8 +798,8 @@ mlxbf_i2c_smbus_start_transaction(struct mlxbf_i2c_priv *priv, * needs to be 'manually' reset. This should be removed in * next tag integration. */ - mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_MASTER_FSM, - MLXBF_I2C_SMBUS_MASTER_FSM_PS_STATE_MASK); + writel(MLXBF_I2C_SMBUS_MASTER_FSM_PS_STATE_MASK, + priv->smbus->io + MLXBF_I2C_SMBUS_MASTER_FSM); } return ret; @@ -1113,8 +1088,8 @@ static void mlxbf_i2c_set_timings(struct mlxbf_i2c_priv *priv, timer |= mlxbf_i2c_set_timer(priv, timings->scl_low, false, MLXBF_I2C_MASK_16, MLXBF_I2C_SHIFT_16); - mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_TIMER_SCL_LOW_SCL_HIGH, - timer); + writel(timer, priv->smbus->io + + MLXBF_I2C_SMBUS_TIMER_SCL_LOW_SCL_HIGH); timer = mlxbf_i2c_set_timer(priv, timings->sda_rise, false, MLXBF_I2C_MASK_8, MLXBF_I2C_SHIFT_0); @@ -1124,37 +1099,34 @@ static void mlxbf_i2c_set_timings(struct mlxbf_i2c_priv *priv, MLXBF_I2C_MASK_8, MLXBF_I2C_SHIFT_16); timer |= mlxbf_i2c_set_timer(priv, timings->scl_fall, false, MLXBF_I2C_MASK_8, MLXBF_I2C_SHIFT_24); - mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_TIMER_FALL_RISE_SPIKE, - timer); + writel(timer, priv->smbus->io + + MLXBF_I2C_SMBUS_TIMER_FALL_RISE_SPIKE); timer = mlxbf_i2c_set_timer(priv, timings->hold_start, true, MLXBF_I2C_MASK_16, MLXBF_I2C_SHIFT_0); timer |= mlxbf_i2c_set_timer(priv, timings->hold_data, true, MLXBF_I2C_MASK_16, MLXBF_I2C_SHIFT_16); - mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_TIMER_THOLD, timer); + writel(timer, priv->smbus->io + MLXBF_I2C_SMBUS_TIMER_THOLD); timer = mlxbf_i2c_set_timer(priv, timings->setup_start, true, MLXBF_I2C_MASK_16, MLXBF_I2C_SHIFT_0); timer |= mlxbf_i2c_set_timer(priv, timings->setup_stop, true, MLXBF_I2C_MASK_16, MLXBF_I2C_SHIFT_16); - mlxbf_i2c_write(priv->smbus->io, - MLXBF_I2C_SMBUS_TIMER_TSETUP_START_STOP, timer); + writel(timer, priv->smbus->io + + MLXBF_I2C_SMBUS_TIMER_TSETUP_START_STOP); timer = mlxbf_i2c_set_timer(priv, timings->setup_data, true, MLXBF_I2C_MASK_16, MLXBF_I2C_SHIFT_0); - mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_TIMER_TSETUP_DATA, - timer); + writel(timer, priv->smbus->io + MLXBF_I2C_SMBUS_TIMER_TSETUP_DATA); timer = mlxbf_i2c_set_timer(priv, timings->buf, false, MLXBF_I2C_MASK_16, MLXBF_I2C_SHIFT_0); timer |= mlxbf_i2c_set_timer(priv, timings->thigh_max, false, MLXBF_I2C_MASK_16, MLXBF_I2C_SHIFT_16); - mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_THIGH_MAX_TBUF, - timer); + writel(timer, priv->smbus->io + MLXBF_I2C_SMBUS_THIGH_MAX_TBUF); timer = timings->timeout; - mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_SCL_LOW_TIMEOUT, - timer); + writel(timer, priv->smbus->io + MLXBF_I2C_SMBUS_SCL_LOW_TIMEOUT); } enum mlxbf_i2c_timings_config { @@ -1426,19 +1398,15 @@ static int mlxbf_i2c_init_master(struct platform_device *pdev, * platform firmware; disabling the bus might compromise the system * functionality. */ - config_reg = mlxbf_i2c_read(gpio_res->io, - MLXBF_I2C_GPIO_0_FUNC_EN_0); + config_reg = readl(gpio_res->io + MLXBF_I2C_GPIO_0_FUNC_EN_0); config_reg = MLXBF_I2C_GPIO_SMBUS_GW_ASSERT_PINS(priv->bus, config_reg); - mlxbf_i2c_write(gpio_res->io, MLXBF_I2C_GPIO_0_FUNC_EN_0, - config_reg); + writel(config_reg, gpio_res->io + MLXBF_I2C_GPIO_0_FUNC_EN_0); - config_reg = mlxbf_i2c_read(gpio_res->io, - MLXBF_I2C_GPIO_0_FORCE_OE_EN); + config_reg = readl(gpio_res->io + MLXBF_I2C_GPIO_0_FORCE_OE_EN); config_reg = MLXBF_I2C_GPIO_SMBUS_GW_RESET_PINS(priv->bus, config_reg); - mlxbf_i2c_write(gpio_res->io, MLXBF_I2C_GPIO_0_FORCE_OE_EN, - config_reg); + writel(config_reg, gpio_res->io + MLXBF_I2C_GPIO_0_FORCE_OE_EN); mutex_unlock(gpio_res->lock); @@ -1452,10 +1420,9 @@ static u64 mlxbf_calculate_freq_from_tyu(struct mlxbf_i2c_resource *corepll_res) u32 corepll_val; u16 core_f; - pad_frequency = MLXBF_I2C_TYU_PLL_IN_FREQ; + pad_frequency = MLXBF_I2C_PLL_IN_FREQ; - corepll_val = mlxbf_i2c_read(corepll_res->io, - MLXBF_I2C_CORE_PLL_REG1); + corepll_val = readl(corepll_res->io + MLXBF_I2C_CORE_PLL_REG1); /* Get Core PLL configuration bits. */ core_f = rol32(corepll_val, MLXBF_I2C_COREPLL_CORE_F_TYU_SHIFT) & @@ -1488,12 +1455,10 @@ static u64 mlxbf_calculate_freq_from_yu(struct mlxbf_i2c_resource *corepll_res) u8 core_od, core_r; u32 core_f; - pad_frequency = MLXBF_I2C_YU_PLL_IN_FREQ; + pad_frequency = MLXBF_I2C_PLL_IN_FREQ; - corepll_reg1_val = mlxbf_i2c_read(corepll_res->io, - MLXBF_I2C_CORE_PLL_REG1); - corepll_reg2_val = mlxbf_i2c_read(corepll_res->io, - MLXBF_I2C_CORE_PLL_REG2); + corepll_reg1_val = readl(corepll_res->io + MLXBF_I2C_CORE_PLL_REG1); + corepll_reg2_val = readl(corepll_res->io + MLXBF_I2C_CORE_PLL_REG2); /* Get Core PLL configuration bits */ core_f = rol32(corepll_reg1_val, MLXBF_I2C_COREPLL_CORE_F_YU_SHIFT) & @@ -1585,7 +1550,7 @@ static int mlxbf_slave_enable(struct mlxbf_i2c_priv *priv, u8 addr) * (7-bit address, 1 status bit (1 if enabled, 0 if not)). */ for (reg = 0; reg < reg_cnt; reg++) { - slave_reg = mlxbf_i2c_read(priv->smbus->io, + slave_reg = readl(priv->smbus->io + MLXBF_I2C_SMBUS_SLAVE_ADDR_CFG + reg * 0x4); /* * Each register holds 4 slave addresses. So, we have to keep @@ -1643,8 +1608,8 @@ static int mlxbf_slave_enable(struct mlxbf_i2c_priv *priv, u8 addr) /* Enable the slave address and update the register. */ slave_reg |= (1 << MLXBF_I2C_SMBUS_SLAVE_ADDR_EN_BIT) << (byte * 8); - mlxbf_i2c_write(priv->smbus->io, - MLXBF_I2C_SMBUS_SLAVE_ADDR_CFG + reg * 0x4, slave_reg); + writel(slave_reg, priv->smbus->io + MLXBF_I2C_SMBUS_SLAVE_ADDR_CFG + + reg * 0x4); return 0; } @@ -1668,7 +1633,7 @@ static int mlxbf_slave_disable(struct mlxbf_i2c_priv *priv) * (7-bit address, 1 status bit (1 if enabled, 0 if not)). */ for (reg = 0; reg < reg_cnt; reg++) { - slave_reg = mlxbf_i2c_read(priv->smbus->io, + slave_reg = readl(priv->smbus->io + MLXBF_I2C_SMBUS_SLAVE_ADDR_CFG + reg * 0x4); /* Check whether the address slots are empty. */ @@ -1708,8 +1673,8 @@ static int mlxbf_slave_disable(struct mlxbf_i2c_priv *priv) /* Cleanup the slave address slot. */ slave_reg &= ~(GENMASK(7, 0) << (slave_byte * 8)); - mlxbf_i2c_write(priv->smbus->io, - MLXBF_I2C_SMBUS_SLAVE_ADDR_CFG + reg * 0x4, slave_reg); + writel(slave_reg, priv->smbus->io + MLXBF_I2C_SMBUS_SLAVE_ADDR_CFG + + reg * 0x4); return 0; } @@ -1801,7 +1766,7 @@ static int mlxbf_i2c_init_slave(struct platform_device *pdev, int ret; /* Reset FSM. */ - mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_SLAVE_FSM, 0); + writel(0, priv->smbus->io + MLXBF_I2C_SMBUS_SLAVE_FSM); /* * Enable slave cause interrupt bits. Drive @@ -1810,15 +1775,13 @@ static int mlxbf_i2c_init_slave(struct platform_device *pdev, * masters issue a Read and Write, respectively. But, clear all * interrupts first. */ - mlxbf_i2c_write(priv->slv_cause->io, - MLXBF_I2C_CAUSE_OR_CLEAR, ~0); + writel(~0, priv->slv_cause->io + MLXBF_I2C_CAUSE_OR_CLEAR); int_reg = MLXBF_I2C_CAUSE_READ_WAIT_FW_RESPONSE; int_reg |= MLXBF_I2C_CAUSE_WRITE_SUCCESS; - mlxbf_i2c_write(priv->slv_cause->io, - MLXBF_I2C_CAUSE_OR_EVTEN0, int_reg); + writel(int_reg, priv->slv_cause->io + MLXBF_I2C_CAUSE_OR_EVTEN0); /* Finally, set the 'ready' bit to start handling transactions. */ - mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_SLAVE_READY, 0x1); + writel(0x1, priv->smbus->io + MLXBF_I2C_SMBUS_SLAVE_READY); /* Initialize the cause coalesce resource. */ ret = mlxbf_i2c_init_coalesce(pdev, priv); @@ -1844,23 +1807,21 @@ static bool mlxbf_i2c_has_coalesce(struct mlxbf_i2c_priv *priv, bool *read, MLXBF_I2C_CAUSE_YU_SLAVE_BIT : priv->bus + MLXBF_I2C_CAUSE_TYU_SLAVE_BIT; - coalesce0_reg = mlxbf_i2c_read(priv->coalesce->io, - MLXBF_I2C_CAUSE_COALESCE_0); + coalesce0_reg = readl(priv->coalesce->io + MLXBF_I2C_CAUSE_COALESCE_0); is_set = coalesce0_reg & (1 << slave_shift); if (!is_set) return false; /* Check the source of the interrupt, i.e. whether a Read or Write. */ - cause_reg = mlxbf_i2c_read(priv->slv_cause->io, - MLXBF_I2C_CAUSE_ARBITER); + cause_reg = readl(priv->slv_cause->io + MLXBF_I2C_CAUSE_ARBITER); if (cause_reg & MLXBF_I2C_CAUSE_READ_WAIT_FW_RESPONSE) *read = true; else if (cause_reg & MLXBF_I2C_CAUSE_WRITE_SUCCESS) *write = true; /* Clear cause bits. */ - mlxbf_i2c_write(priv->slv_cause->io, MLXBF_I2C_CAUSE_OR_CLEAR, ~0x0); + writel(~0x0, priv->slv_cause->io + MLXBF_I2C_CAUSE_OR_CLEAR); return true; } @@ -1900,8 +1861,8 @@ static int mlxbf_smbus_irq_send(struct mlxbf_i2c_priv *priv, u8 recv_bytes) * address, if supplied. */ if (recv_bytes > 0) { - data32 = mlxbf_i2c_read_data(priv->smbus->io, - MLXBF_I2C_SLAVE_DATA_DESC_ADDR); + data32 = ioread32be(priv->smbus->io + + MLXBF_I2C_SLAVE_DATA_DESC_ADDR); /* Parse the received bytes. */ switch (recv_bytes) { @@ -1966,7 +1927,7 @@ static int mlxbf_smbus_irq_send(struct mlxbf_i2c_priv *priv, u8 recv_bytes) control32 |= rol32(write_size, MLXBF_I2C_SLAVE_WRITE_BYTES_SHIFT); control32 |= rol32(pec_en, MLXBF_I2C_SLAVE_SEND_PEC_SHIFT); - mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_SLAVE_GW, control32); + writel(control32, priv->smbus->io + MLXBF_I2C_SMBUS_SLAVE_GW); /* * Wait until the transfer is completed; the driver will wait @@ -1975,10 +1936,9 @@ static int mlxbf_smbus_irq_send(struct mlxbf_i2c_priv *priv, u8 recv_bytes) mlxbf_smbus_slave_wait_for_idle(priv, MLXBF_I2C_SMBUS_TIMEOUT); /* Release the Slave GW. */ - mlxbf_i2c_write(priv->smbus->io, - MLXBF_I2C_SMBUS_SLAVE_RS_MASTER_BYTES, 0x0); - mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_SLAVE_PEC, 0x0); - mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_SLAVE_READY, 0x1); + writel(0x0, priv->smbus->io + MLXBF_I2C_SMBUS_SLAVE_RS_MASTER_BYTES); + writel(0x0, priv->smbus->io + MLXBF_I2C_SMBUS_SLAVE_PEC); + writel(0x1, priv->smbus->io + MLXBF_I2C_SMBUS_SLAVE_READY); return 0; } @@ -2023,10 +1983,9 @@ static int mlxbf_smbus_irq_recv(struct mlxbf_i2c_priv *priv, u8 recv_bytes) i2c_slave_event(slave, I2C_SLAVE_STOP, &value); /* Release the Slave GW. */ - mlxbf_i2c_write(priv->smbus->io, - MLXBF_I2C_SMBUS_SLAVE_RS_MASTER_BYTES, 0x0); - mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_SLAVE_PEC, 0x0); - mlxbf_i2c_write(priv->smbus->io, MLXBF_I2C_SMBUS_SLAVE_READY, 0x1); + writel(0x0, priv->smbus->io + MLXBF_I2C_SMBUS_SLAVE_RS_MASTER_BYTES); + writel(0x0, priv->smbus->io + MLXBF_I2C_SMBUS_SLAVE_PEC); + writel(0x1, priv->smbus->io + MLXBF_I2C_SMBUS_SLAVE_READY); return ret; } @@ -2061,8 +2020,8 @@ static irqreturn_t mlxbf_smbus_irq(int irq, void *ptr) * slave, if the higher 8 bits are sent then the slave expect N bytes * from the master. */ - rw_bytes_reg = mlxbf_i2c_read(priv->smbus->io, - MLXBF_I2C_SMBUS_SLAVE_RS_MASTER_BYTES); + rw_bytes_reg = readl(priv->smbus->io + + MLXBF_I2C_SMBUS_SLAVE_RS_MASTER_BYTES); recv_bytes = (rw_bytes_reg >> 8) & GENMASK(7, 0); /* @@ -2264,6 +2223,7 @@ static const struct of_device_id mlxbf_i2c_dt_ids[] = { MODULE_DEVICE_TABLE(of, mlxbf_i2c_dt_ids); +#ifdef CONFIG_ACPI static const struct acpi_device_id mlxbf_i2c_acpi_ids[] = { { "MLNXBF03", (kernel_ulong_t)&mlxbf_i2c_chip[MLXBF_I2C_CHIP_TYPE_1] }, { "MLNXBF23", (kernel_ulong_t)&mlxbf_i2c_chip[MLXBF_I2C_CHIP_TYPE_2] }, @@ -2305,6 +2265,12 @@ static int mlxbf_i2c_acpi_probe(struct device *dev, struct mlxbf_i2c_priv *priv) return ret; } +#else +static int mlxbf_i2c_acpi_probe(struct device *dev, struct mlxbf_i2c_priv *priv) +{ + return -ENOENT; +} +#endif /* CONFIG_ACPI */ static int mlxbf_i2c_of_probe(struct device *dev, struct mlxbf_i2c_priv *priv) { @@ -2473,7 +2439,9 @@ static struct platform_driver mlxbf_i2c_driver = { .driver = { .name = "i2c-mlxbf", .of_match_table = mlxbf_i2c_dt_ids, +#ifdef CONFIG_ACPI .acpi_match_table = ACPI_PTR(mlxbf_i2c_acpi_ids), +#endif /* CONFIG_ACPI */ }, }; @@ -2502,5 +2470,5 @@ static void __exit mlxbf_i2c_exit(void) module_exit(mlxbf_i2c_exit); MODULE_DESCRIPTION("Mellanox BlueField I2C bus driver"); -MODULE_AUTHOR("Khalil Blaiech <[email protected]>"); +MODULE_AUTHOR("Khalil Blaiech <[email protected]>"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c index 0cbdfbe605b5..33de99b7bc20 100644 --- a/drivers/i2c/busses/i2c-mt65xx.c +++ b/drivers/i2c/busses/i2c-mt65xx.c @@ -475,6 +475,10 @@ static void mtk_i2c_init_hw(struct mtk_i2c *i2c) { u16 control_reg; + writel(I2C_DMA_HARD_RST, i2c->pdmabase + OFFSET_RST); + udelay(50); + writel(I2C_DMA_CLR_FLAG, i2c->pdmabase + OFFSET_RST); + mtk_i2c_writew(i2c, I2C_SOFT_RST, OFFSET_SOFTRESET); /* Set ioconfig */ @@ -529,10 +533,6 @@ static void mtk_i2c_init_hw(struct mtk_i2c *i2c) mtk_i2c_writew(i2c, control_reg, OFFSET_CONTROL); mtk_i2c_writew(i2c, I2C_DELAY_LEN, OFFSET_DELAY_LEN); - - writel(I2C_DMA_HARD_RST, i2c->pdmabase + OFFSET_RST); - udelay(50); - writel(I2C_DMA_CLR_FLAG, i2c->pdmabase + OFFSET_RST); } static const struct i2c_spec_values *mtk_i2c_get_spec(unsigned int speed) diff --git a/drivers/i2c/busses/i2c-sh_mobile.c b/drivers/i2c/busses/i2c-sh_mobile.c index cab725559999..bdd60770779a 100644 --- a/drivers/i2c/busses/i2c-sh_mobile.c +++ b/drivers/i2c/busses/i2c-sh_mobile.c @@ -129,6 +129,7 @@ struct sh_mobile_i2c_data { int sr; bool send_stop; bool stop_after_dma; + bool atomic_xfer; struct resource *res; struct dma_chan *dma_tx; @@ -330,13 +331,15 @@ static unsigned char i2c_op(struct sh_mobile_i2c_data *pd, enum sh_mobile_i2c_op ret = iic_rd(pd, ICDR); break; case OP_RX_STOP: /* enable DTE interrupt, issue stop */ - iic_wr(pd, ICIC, - ICIC_DTEE | ICIC_WAITE | ICIC_ALE | ICIC_TACKE); + if (!pd->atomic_xfer) + iic_wr(pd, ICIC, + ICIC_DTEE | ICIC_WAITE | ICIC_ALE | ICIC_TACKE); iic_wr(pd, ICCR, ICCR_ICE | ICCR_RACK); break; case OP_RX_STOP_DATA: /* enable DTE interrupt, read data, issue stop */ - iic_wr(pd, ICIC, - ICIC_DTEE | ICIC_WAITE | ICIC_ALE | ICIC_TACKE); + if (!pd->atomic_xfer) + iic_wr(pd, ICIC, + ICIC_DTEE | ICIC_WAITE | ICIC_ALE | ICIC_TACKE); ret = iic_rd(pd, ICDR); iic_wr(pd, ICCR, ICCR_ICE | ICCR_RACK); break; @@ -429,7 +432,8 @@ static irqreturn_t sh_mobile_i2c_isr(int irq, void *dev_id) if (wakeup) { pd->sr |= SW_DONE; - wake_up(&pd->wait); + if (!pd->atomic_xfer) + wake_up(&pd->wait); } /* defeat write posting to avoid spurious WAIT interrupts */ @@ -581,6 +585,9 @@ static void start_ch(struct sh_mobile_i2c_data *pd, struct i2c_msg *usr_msg, pd->pos = -1; pd->sr = 0; + if (pd->atomic_xfer) + return; + pd->dma_buf = i2c_get_dma_safe_msg_buf(pd->msg, 8); if (pd->dma_buf) sh_mobile_i2c_xfer_dma(pd); @@ -637,15 +644,13 @@ static int poll_busy(struct sh_mobile_i2c_data *pd) return i ? 0 : -ETIMEDOUT; } -static int sh_mobile_i2c_xfer(struct i2c_adapter *adapter, - struct i2c_msg *msgs, - int num) +static int sh_mobile_xfer(struct sh_mobile_i2c_data *pd, + struct i2c_msg *msgs, int num) { - struct sh_mobile_i2c_data *pd = i2c_get_adapdata(adapter); struct i2c_msg *msg; int err = 0; int i; - long timeout; + long time_left; /* Wake up device and enable clock */ pm_runtime_get_sync(pd->dev); @@ -662,15 +667,35 @@ static int sh_mobile_i2c_xfer(struct i2c_adapter *adapter, if (do_start) i2c_op(pd, OP_START); - /* The interrupt handler takes care of the rest... */ - timeout = wait_event_timeout(pd->wait, - pd->sr & (ICSR_TACK | SW_DONE), - adapter->timeout); - - /* 'stop_after_dma' tells if DMA transfer was complete */ - i2c_put_dma_safe_msg_buf(pd->dma_buf, pd->msg, pd->stop_after_dma); + if (pd->atomic_xfer) { + unsigned long j = jiffies + pd->adap.timeout; + + time_left = time_before_eq(jiffies, j); + while (time_left && + !(pd->sr & (ICSR_TACK | SW_DONE))) { + unsigned char sr = iic_rd(pd, ICSR); + + if (sr & (ICSR_AL | ICSR_TACK | + ICSR_WAIT | ICSR_DTE)) { + sh_mobile_i2c_isr(0, pd); + udelay(150); + } else { + cpu_relax(); + } + time_left = time_before_eq(jiffies, j); + } + } else { + /* The interrupt handler takes care of the rest... */ + time_left = wait_event_timeout(pd->wait, + pd->sr & (ICSR_TACK | SW_DONE), + pd->adap.timeout); + + /* 'stop_after_dma' tells if DMA xfer was complete */ + i2c_put_dma_safe_msg_buf(pd->dma_buf, pd->msg, + pd->stop_after_dma); + } - if (!timeout) { + if (!time_left) { dev_err(pd->dev, "Transfer request timed out\n"); if (pd->dma_direction != DMA_NONE) sh_mobile_i2c_cleanup_dma(pd); @@ -696,14 +721,35 @@ static int sh_mobile_i2c_xfer(struct i2c_adapter *adapter, return err ?: num; } +static int sh_mobile_i2c_xfer(struct i2c_adapter *adapter, + struct i2c_msg *msgs, + int num) +{ + struct sh_mobile_i2c_data *pd = i2c_get_adapdata(adapter); + + pd->atomic_xfer = false; + return sh_mobile_xfer(pd, msgs, num); +} + +static int sh_mobile_i2c_xfer_atomic(struct i2c_adapter *adapter, + struct i2c_msg *msgs, + int num) +{ + struct sh_mobile_i2c_data *pd = i2c_get_adapdata(adapter); + + pd->atomic_xfer = true; + return sh_mobile_xfer(pd, msgs, num); +} + static u32 sh_mobile_i2c_func(struct i2c_adapter *adapter) { return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL | I2C_FUNC_PROTOCOL_MANGLING; } static const struct i2c_algorithm sh_mobile_i2c_algorithm = { - .functionality = sh_mobile_i2c_func, - .master_xfer = sh_mobile_i2c_xfer, + .functionality = sh_mobile_i2c_func, + .master_xfer = sh_mobile_i2c_xfer, + .master_xfer_atomic = sh_mobile_i2c_xfer_atomic, }; static const struct i2c_adapter_quirks sh_mobile_i2c_quirks = { diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index c6098eee0c7c..2aa79c32ee22 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -180,7 +180,6 @@ config IRQ_MIPS_CPU select GENERIC_IRQ_CHIP select GENERIC_IRQ_IPI if SYS_SUPPORTS_MULTITHREADING select IRQ_DOMAIN - select IRQ_DOMAIN_HIERARCHY if GENERIC_IRQ_IPI select GENERIC_IRQ_EFFECTIVE_AFF_MASK config CLPS711X_IRQCHIP @@ -315,7 +314,6 @@ config KEYSTONE_IRQ config MIPS_GIC bool select GENERIC_IRQ_IPI - select IRQ_DOMAIN_HIERARCHY select MIPS_CM config INGENIC_IRQ @@ -591,6 +589,7 @@ config LOONGSON_PCH_MSI config MST_IRQ bool "MStar Interrupt Controller" + depends on ARCH_MEDIATEK || ARCH_MSTARV7 || COMPILE_TEST default ARCH_MEDIATEK select IRQ_DOMAIN select IRQ_DOMAIN_HIERARCHY diff --git a/drivers/irqchip/irq-bcm2836.c b/drivers/irqchip/irq-bcm2836.c index 97838eb705f9..cbc7c740e4dc 100644 --- a/drivers/irqchip/irq-bcm2836.c +++ b/drivers/irqchip/irq-bcm2836.c @@ -244,7 +244,7 @@ static int bcm2836_cpu_dying(unsigned int cpu) #define BITS_PER_MBOX 32 -static void bcm2836_arm_irqchip_smp_init(void) +static void __init bcm2836_arm_irqchip_smp_init(void) { struct irq_fwspec ipi_fwspec = { .fwnode = intc.domain->fwnode, diff --git a/drivers/irqchip/irq-mst-intc.c b/drivers/irqchip/irq-mst-intc.c index 4be077591898..143657b0cf28 100644 --- a/drivers/irqchip/irq-mst-intc.c +++ b/drivers/irqchip/irq-mst-intc.c @@ -154,8 +154,8 @@ static const struct irq_domain_ops mst_intc_domain_ops = { .free = irq_domain_free_irqs_common, }; -int __init -mst_intc_of_init(struct device_node *dn, struct device_node *parent) +static int __init mst_intc_of_init(struct device_node *dn, + struct device_node *parent) { struct irq_domain *domain, *domain_parent; struct mst_intc_chip_data *cd; diff --git a/drivers/irqchip/irq-renesas-intc-irqpin.c b/drivers/irqchip/irq-renesas-intc-irqpin.c index 3819185bfd02..cb7f60b3b4a9 100644 --- a/drivers/irqchip/irq-renesas-intc-irqpin.c +++ b/drivers/irqchip/irq-renesas-intc-irqpin.c @@ -71,8 +71,7 @@ struct intc_irqpin_priv { }; struct intc_irqpin_config { - unsigned int irlm_bit; - unsigned needs_irlm:1; + int irlm_bit; /* -1 if non-existent */ }; static unsigned long intc_irqpin_read32(void __iomem *iomem) @@ -349,11 +348,10 @@ static const struct irq_domain_ops intc_irqpin_irq_domain_ops = { static const struct intc_irqpin_config intc_irqpin_irlm_r8a777x = { .irlm_bit = 23, /* ICR0.IRLM0 */ - .needs_irlm = 1, }; static const struct intc_irqpin_config intc_irqpin_rmobile = { - .needs_irlm = 0, + .irlm_bit = -1, }; static const struct of_device_id intc_irqpin_dt_ids[] = { @@ -470,7 +468,7 @@ static int intc_irqpin_probe(struct platform_device *pdev) } /* configure "individual IRQ mode" where needed */ - if (config && config->needs_irlm) { + if (config && config->irlm_bit >= 0) { if (io[INTC_IRQPIN_REG_IRLM]) intc_irqpin_read_modify_write(p, INTC_IRQPIN_REG_IRLM, config->irlm_bit, 1, 1); diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c index eaa3e9fe54e9..6f432d2a5ceb 100644 --- a/drivers/irqchip/irq-sifive-plic.c +++ b/drivers/irqchip/irq-sifive-plic.c @@ -99,7 +99,7 @@ static inline void plic_irq_toggle(const struct cpumask *mask, struct irq_data *d, int enable) { int cpu; - struct plic_priv *priv = irq_get_chip_data(d->irq); + struct plic_priv *priv = irq_data_get_irq_chip_data(d); writel(enable, priv->regs + PRIORITY_BASE + d->hwirq * PRIORITY_PER_ID); for_each_cpu(cpu, mask) { @@ -115,7 +115,7 @@ static void plic_irq_unmask(struct irq_data *d) { struct cpumask amask; unsigned int cpu; - struct plic_priv *priv = irq_get_chip_data(d->irq); + struct plic_priv *priv = irq_data_get_irq_chip_data(d); cpumask_and(&amask, &priv->lmask, cpu_online_mask); cpu = cpumask_any_and(irq_data_get_affinity_mask(d), @@ -127,7 +127,7 @@ static void plic_irq_unmask(struct irq_data *d) static void plic_irq_mask(struct irq_data *d) { - struct plic_priv *priv = irq_get_chip_data(d->irq); + struct plic_priv *priv = irq_data_get_irq_chip_data(d); plic_irq_toggle(&priv->lmask, d, 0); } @@ -138,7 +138,7 @@ static int plic_set_affinity(struct irq_data *d, { unsigned int cpu; struct cpumask amask; - struct plic_priv *priv = irq_get_chip_data(d->irq); + struct plic_priv *priv = irq_data_get_irq_chip_data(d); cpumask_and(&amask, &priv->lmask, mask_val); @@ -151,7 +151,7 @@ static int plic_set_affinity(struct irq_data *d, return -EINVAL; plic_irq_toggle(&priv->lmask, d, 0); - plic_irq_toggle(cpumask_of(cpu), d, 1); + plic_irq_toggle(cpumask_of(cpu), d, !irqd_irq_masked(d)); irq_data_update_effective_affinity(d, cpumask_of(cpu)); diff --git a/drivers/irqchip/irq-stm32-exti.c b/drivers/irqchip/irq-stm32-exti.c index 0c2c61db26b4..8662d7b7b262 100644 --- a/drivers/irqchip/irq-stm32-exti.c +++ b/drivers/irqchip/irq-stm32-exti.c @@ -195,6 +195,10 @@ static const struct stm32_desc_irq stm32mp1_desc_irq[] = { { .exti = 25, .irq_parent = 107, .chip = &stm32_exti_h_chip_direct }, { .exti = 30, .irq_parent = 52, .chip = &stm32_exti_h_chip_direct }, { .exti = 47, .irq_parent = 93, .chip = &stm32_exti_h_chip_direct }, + { .exti = 48, .irq_parent = 138, .chip = &stm32_exti_h_chip_direct }, + { .exti = 50, .irq_parent = 139, .chip = &stm32_exti_h_chip_direct }, + { .exti = 52, .irq_parent = 140, .chip = &stm32_exti_h_chip_direct }, + { .exti = 53, .irq_parent = 141, .chip = &stm32_exti_h_chip_direct }, { .exti = 54, .irq_parent = 135, .chip = &stm32_exti_h_chip_direct }, { .exti = 61, .irq_parent = 100, .chip = &stm32_exti_h_chip_direct }, { .exti = 65, .irq_parent = 144, .chip = &stm32_exti_h_chip }, diff --git a/drivers/irqchip/irq-ti-sci-inta.c b/drivers/irqchip/irq-ti-sci-inta.c index e0cceb81c648..b2ab8db439d9 100644 --- a/drivers/irqchip/irq-ti-sci-inta.c +++ b/drivers/irqchip/irq-ti-sci-inta.c @@ -85,6 +85,17 @@ struct ti_sci_inta_vint_desc { * @base: Base address of the memory mapped IO registers * @pdev: Pointer to platform device. * @ti_sci_id: TI-SCI device identifier + * @unmapped_cnt: Number of @unmapped_dev_ids entries + * @unmapped_dev_ids: Pointer to an array of TI-SCI device identifiers of + * unmapped event sources. + * Unmapped Events are not part of the Global Event Map and + * they are converted to Global event within INTA to be + * received by the same INTA to generate an interrupt. + * In case an interrupt request comes for a device which is + * generating Unmapped Event, we must use the INTA's TI-SCI + * device identifier in place of the source device + * identifier to let sysfw know where it has to program the + * Global Event number. */ struct ti_sci_inta_irq_domain { const struct ti_sci_handle *sci; @@ -96,11 +107,37 @@ struct ti_sci_inta_irq_domain { void __iomem *base; struct platform_device *pdev; u32 ti_sci_id; + + int unmapped_cnt; + u16 *unmapped_dev_ids; }; #define to_vint_desc(e, i) container_of(e, struct ti_sci_inta_vint_desc, \ events[i]) +static u16 ti_sci_inta_get_dev_id(struct ti_sci_inta_irq_domain *inta, u32 hwirq) +{ + u16 dev_id = HWIRQ_TO_DEVID(hwirq); + int i; + + if (inta->unmapped_cnt == 0) + return dev_id; + + /* + * For devices sending Unmapped Events we must use the INTA's TI-SCI + * device identifier number to be able to convert it to a Global Event + * and map it to an interrupt. + */ + for (i = 0; i < inta->unmapped_cnt; i++) { + if (dev_id == inta->unmapped_dev_ids[i]) { + dev_id = inta->ti_sci_id; + break; + } + } + + return dev_id; +} + /** * ti_sci_inta_irq_handler() - Chained IRQ handler for the vint irqs * @desc: Pointer to irq_desc corresponding to the irq @@ -251,7 +288,7 @@ static struct ti_sci_inta_event_desc *ti_sci_inta_alloc_event(struct ti_sci_inta u16 dev_id, dev_index; int err; - dev_id = HWIRQ_TO_DEVID(hwirq); + dev_id = ti_sci_inta_get_dev_id(inta, hwirq); dev_index = HWIRQ_TO_IRQID(hwirq); event_desc = &vint_desc->events[free_bit]; @@ -352,14 +389,15 @@ static void ti_sci_inta_free_irq(struct ti_sci_inta_event_desc *event_desc, { struct ti_sci_inta_vint_desc *vint_desc; struct ti_sci_inta_irq_domain *inta; + u16 dev_id; vint_desc = to_vint_desc(event_desc, event_desc->vint_bit); inta = vint_desc->domain->host_data; + dev_id = ti_sci_inta_get_dev_id(inta, hwirq); /* free event irq */ mutex_lock(&inta->vint_mutex); inta->sci->ops.rm_irq_ops.free_event_map(inta->sci, - HWIRQ_TO_DEVID(hwirq), - HWIRQ_TO_IRQID(hwirq), + dev_id, HWIRQ_TO_IRQID(hwirq), inta->ti_sci_id, vint_desc->vint_id, event_desc->global_event, @@ -574,6 +612,41 @@ static struct msi_domain_info ti_sci_inta_msi_domain_info = { .chip = &ti_sci_inta_msi_irq_chip, }; +static int ti_sci_inta_get_unmapped_sources(struct ti_sci_inta_irq_domain *inta) +{ + struct device *dev = &inta->pdev->dev; + struct device_node *node = dev_of_node(dev); + struct of_phandle_iterator it; + int count, err, ret, i; + + count = of_count_phandle_with_args(node, "ti,unmapped-event-sources", NULL); + if (count <= 0) + return 0; + + inta->unmapped_dev_ids = devm_kcalloc(dev, count, + sizeof(*inta->unmapped_dev_ids), + GFP_KERNEL); + if (!inta->unmapped_dev_ids) + return -ENOMEM; + + i = 0; + of_for_each_phandle(&it, err, node, "ti,unmapped-event-sources", NULL, 0) { + u32 dev_id; + + ret = of_property_read_u32(it.node, "ti,sci-dev-id", &dev_id); + if (ret) { + dev_err(dev, "ti,sci-dev-id read failure for %pOFf\n", it.node); + of_node_put(it.node); + return ret; + } + inta->unmapped_dev_ids[i++] = dev_id; + } + + inta->unmapped_cnt = count; + + return 0; +} + static int ti_sci_inta_irq_domain_probe(struct platform_device *pdev) { struct irq_domain *parent_domain, *domain, *msi_domain; @@ -629,6 +702,10 @@ static int ti_sci_inta_irq_domain_probe(struct platform_device *pdev) if (IS_ERR(inta->base)) return PTR_ERR(inta->base); + ret = ti_sci_inta_get_unmapped_sources(inta); + if (ret) + return ret; + domain = irq_domain_add_linear(dev_of_node(dev), ti_sci_get_num_resources(inta->vint), &ti_sci_inta_irq_domain_ops, inta); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 376096bfc54a..40ca71b29bb9 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4582,8 +4582,7 @@ void nvme_start_queues(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_start_queues); - -void nvme_sync_queues(struct nvme_ctrl *ctrl) +void nvme_sync_io_queues(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; @@ -4591,7 +4590,12 @@ void nvme_sync_queues(struct nvme_ctrl *ctrl) list_for_each_entry(ns, &ctrl->namespaces, list) blk_sync_queue(ns->queue); up_read(&ctrl->namespaces_rwsem); +} +EXPORT_SYMBOL_GPL(nvme_sync_io_queues); +void nvme_sync_queues(struct nvme_ctrl *ctrl) +{ + nvme_sync_io_queues(ctrl); if (ctrl->admin_q) blk_sync_queue(ctrl->admin_q); } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index cc111136a981..bc330bf0d3bd 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -602,6 +602,7 @@ void nvme_stop_queues(struct nvme_ctrl *ctrl); void nvme_start_queues(struct nvme_ctrl *ctrl); void nvme_kill_queues(struct nvme_ctrl *ctrl); void nvme_sync_queues(struct nvme_ctrl *ctrl); +void nvme_sync_io_queues(struct nvme_ctrl *ctrl); void nvme_unfreeze(struct nvme_ctrl *ctrl); void nvme_wait_freeze(struct nvme_ctrl *ctrl); int nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index df8f3612107f..0578ff253c47 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -198,6 +198,7 @@ struct nvme_queue { u32 q_depth; u16 cq_vector; u16 sq_tail; + u16 last_sq_tail; u16 cq_head; u16 qid; u8 cq_phase; @@ -455,11 +456,24 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set) return 0; } -static inline void nvme_write_sq_db(struct nvme_queue *nvmeq) +/* + * Write sq tail if we are asked to, or if the next command would wrap. + */ +static inline void nvme_write_sq_db(struct nvme_queue *nvmeq, bool write_sq) { + if (!write_sq) { + u16 next_tail = nvmeq->sq_tail + 1; + + if (next_tail == nvmeq->q_depth) + next_tail = 0; + if (next_tail != nvmeq->last_sq_tail) + return; + } + if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail, nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei)) writel(nvmeq->sq_tail, nvmeq->q_db); + nvmeq->last_sq_tail = nvmeq->sq_tail; } /** @@ -476,8 +490,7 @@ static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd, cmd, sizeof(*cmd)); if (++nvmeq->sq_tail == nvmeq->q_depth) nvmeq->sq_tail = 0; - if (write_sq) - nvme_write_sq_db(nvmeq); + nvme_write_sq_db(nvmeq, write_sq); spin_unlock(&nvmeq->sq_lock); } @@ -486,7 +499,8 @@ static void nvme_commit_rqs(struct blk_mq_hw_ctx *hctx) struct nvme_queue *nvmeq = hctx->driver_data; spin_lock(&nvmeq->sq_lock); - nvme_write_sq_db(nvmeq); + if (nvmeq->sq_tail != nvmeq->last_sq_tail) + nvme_write_sq_db(nvmeq, true); spin_unlock(&nvmeq->sq_lock); } @@ -1496,6 +1510,7 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) struct nvme_dev *dev = nvmeq->dev; nvmeq->sq_tail = 0; + nvmeq->last_sq_tail = 0; nvmeq->cq_head = 0; nvmeq->cq_phase = 1; nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 541b0cba6d80..65e3d0ef36e1 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -122,7 +122,6 @@ struct nvme_rdma_ctrl { struct sockaddr_storage src_addr; struct nvme_ctrl ctrl; - struct mutex teardown_lock; bool use_inline_data; u32 io_queues[HCTX_MAX_TYPES]; }; @@ -1010,8 +1009,8 @@ out_free_io_queues: static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl, bool remove) { - mutex_lock(&ctrl->teardown_lock); blk_mq_quiesce_queue(ctrl->ctrl.admin_q); + blk_sync_queue(ctrl->ctrl.admin_q); nvme_rdma_stop_queue(&ctrl->queues[0]); if (ctrl->ctrl.admin_tagset) { blk_mq_tagset_busy_iter(ctrl->ctrl.admin_tagset, @@ -1021,16 +1020,15 @@ static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl, if (remove) blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); nvme_rdma_destroy_admin_queue(ctrl, remove); - mutex_unlock(&ctrl->teardown_lock); } static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, bool remove) { - mutex_lock(&ctrl->teardown_lock); if (ctrl->ctrl.queue_count > 1) { nvme_start_freeze(&ctrl->ctrl); nvme_stop_queues(&ctrl->ctrl); + nvme_sync_io_queues(&ctrl->ctrl); nvme_rdma_stop_io_queues(ctrl); if (ctrl->ctrl.tagset) { blk_mq_tagset_busy_iter(ctrl->ctrl.tagset, @@ -1041,7 +1039,6 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, nvme_start_queues(&ctrl->ctrl); nvme_rdma_destroy_io_queues(ctrl, remove); } - mutex_unlock(&ctrl->teardown_lock); } static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl) @@ -1976,16 +1973,12 @@ static void nvme_rdma_complete_timed_out(struct request *rq) { struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_queue *queue = req->queue; - struct nvme_rdma_ctrl *ctrl = queue->ctrl; - /* fence other contexts that may complete the command */ - mutex_lock(&ctrl->teardown_lock); nvme_rdma_stop_queue(queue); - if (!blk_mq_request_completed(rq)) { + if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) { nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD; blk_mq_complete_request(rq); } - mutex_unlock(&ctrl->teardown_lock); } static enum blk_eh_timer_return @@ -2320,7 +2313,6 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, return ERR_PTR(-ENOMEM); ctrl->ctrl.opts = opts; INIT_LIST_HEAD(&ctrl->list); - mutex_init(&ctrl->teardown_lock); if (!(opts->mask & NVMF_OPT_TRSVCID)) { opts->trsvcid = diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index d6a3e1487354..c0c33320fe65 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -124,7 +124,6 @@ struct nvme_tcp_ctrl { struct sockaddr_storage src_addr; struct nvme_ctrl ctrl; - struct mutex teardown_lock; struct work_struct err_work; struct delayed_work connect_work; struct nvme_tcp_request async_req; @@ -1886,8 +1885,8 @@ out_free_queue: static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl, bool remove) { - mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock); blk_mq_quiesce_queue(ctrl->admin_q); + blk_sync_queue(ctrl->admin_q); nvme_tcp_stop_queue(ctrl, 0); if (ctrl->admin_tagset) { blk_mq_tagset_busy_iter(ctrl->admin_tagset, @@ -1897,18 +1896,17 @@ static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl, if (remove) blk_mq_unquiesce_queue(ctrl->admin_q); nvme_tcp_destroy_admin_queue(ctrl, remove); - mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock); } static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl, bool remove) { - mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock); if (ctrl->queue_count <= 1) - goto out; + return; blk_mq_quiesce_queue(ctrl->admin_q); nvme_start_freeze(ctrl); nvme_stop_queues(ctrl); + nvme_sync_io_queues(ctrl); nvme_tcp_stop_io_queues(ctrl); if (ctrl->tagset) { blk_mq_tagset_busy_iter(ctrl->tagset, @@ -1918,8 +1916,6 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl, if (remove) nvme_start_queues(ctrl); nvme_tcp_destroy_io_queues(ctrl, remove); -out: - mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock); } static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl) @@ -2171,14 +2167,11 @@ static void nvme_tcp_complete_timed_out(struct request *rq) struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl; - /* fence other contexts that may complete the command */ - mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock); nvme_tcp_stop_queue(ctrl, nvme_tcp_queue_id(req->queue)); - if (!blk_mq_request_completed(rq)) { + if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) { nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD; blk_mq_complete_request(rq); } - mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock); } static enum blk_eh_timer_return @@ -2455,7 +2448,6 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev, nvme_tcp_reconnect_ctrl_work); INIT_WORK(&ctrl->err_work, nvme_tcp_error_recovery_work); INIT_WORK(&ctrl->ctrl.reset_work, nvme_reset_ctrl_work); - mutex_init(&ctrl->teardown_lock); if (!(opts->mask & NVMF_OPT_TRSVCID)) { opts->trsvcid = diff --git a/fs/io-wq.c b/fs/io-wq.c index 02894df7656d..b53c055bea6a 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -482,6 +482,10 @@ static void io_impersonate_work(struct io_worker *worker, current->files = work->identity->files; current->nsproxy = work->identity->nsproxy; task_unlock(current); + if (!work->identity->files) { + /* failed grabbing files, ensure work gets cancelled */ + work->flags |= IO_WQ_WORK_CANCEL; + } } if ((work->flags & IO_WQ_WORK_FS) && current->fs != work->identity->fs) current->fs = work->identity->fs; diff --git a/fs/io_uring.c b/fs/io_uring.c index a7429c977eb3..8018c7076b25 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -995,20 +995,33 @@ static void io_sq_thread_drop_mm(void) if (mm) { kthread_unuse_mm(mm); mmput(mm); + current->mm = NULL; } } static int __io_sq_thread_acquire_mm(struct io_ring_ctx *ctx) { - if (!current->mm) { - if (unlikely(!(ctx->flags & IORING_SETUP_SQPOLL) || - !ctx->sqo_task->mm || - !mmget_not_zero(ctx->sqo_task->mm))) - return -EFAULT; - kthread_use_mm(ctx->sqo_task->mm); + struct mm_struct *mm; + + if (current->mm) + return 0; + + /* Should never happen */ + if (unlikely(!(ctx->flags & IORING_SETUP_SQPOLL))) + return -EFAULT; + + task_lock(ctx->sqo_task); + mm = ctx->sqo_task->mm; + if (unlikely(!mm || !mmget_not_zero(mm))) + mm = NULL; + task_unlock(ctx->sqo_task); + + if (mm) { + kthread_use_mm(mm); + return 0; } - return 0; + return -EFAULT; } static int io_sq_thread_acquire_mm(struct io_ring_ctx *ctx, @@ -1274,9 +1287,12 @@ static bool io_identity_cow(struct io_kiocb *req) /* add one for this request */ refcount_inc(&id->count); - /* drop old identity, assign new one. one ref for req, one for tctx */ - if (req->work.identity != tctx->identity && - refcount_sub_and_test(2, &req->work.identity->count)) + /* drop tctx and req identity references, if needed */ + if (tctx->identity != &tctx->__identity && + refcount_dec_and_test(&tctx->identity->count)) + kfree(tctx->identity); + if (req->work.identity != &tctx->__identity && + refcount_dec_and_test(&req->work.identity->count)) kfree(req->work.identity); req->work.identity = id; @@ -1577,14 +1593,29 @@ static void io_cqring_mark_overflow(struct io_ring_ctx *ctx) } } -static inline bool io_match_files(struct io_kiocb *req, - struct files_struct *files) +static inline bool __io_match_files(struct io_kiocb *req, + struct files_struct *files) { + return ((req->flags & REQ_F_WORK_INITIALIZED) && + (req->work.flags & IO_WQ_WORK_FILES)) && + req->work.identity->files == files; +} + +static bool io_match_files(struct io_kiocb *req, + struct files_struct *files) +{ + struct io_kiocb *link; + if (!files) return true; - if ((req->flags & REQ_F_WORK_INITIALIZED) && - (req->work.flags & IO_WQ_WORK_FILES)) - return req->work.identity->files == files; + if (__io_match_files(req, files)) + return true; + if (req->flags & REQ_F_LINK_HEAD) { + list_for_each_entry(link, &req->link_list, link_list) { + if (__io_match_files(link, files)) + return true; + } + } return false; } @@ -1668,7 +1699,8 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags) WRITE_ONCE(cqe->user_data, req->user_data); WRITE_ONCE(cqe->res, res); WRITE_ONCE(cqe->flags, cflags); - } else if (ctx->cq_overflow_flushed || req->task->io_uring->in_idle) { + } else if (ctx->cq_overflow_flushed || + atomic_read(&req->task->io_uring->in_idle)) { /* * If we're in ring overflow flush mode, or in task cancel mode, * then we cannot store the request for later flushing, we need @@ -1838,7 +1870,7 @@ static void __io_free_req(struct io_kiocb *req) io_dismantle_req(req); percpu_counter_dec(&tctx->inflight); - if (tctx->in_idle) + if (atomic_read(&tctx->in_idle)) wake_up(&tctx->wait); put_task_struct(req->task); @@ -7695,7 +7727,8 @@ static int io_uring_alloc_task_context(struct task_struct *task) xa_init(&tctx->xa); init_waitqueue_head(&tctx->wait); tctx->last = NULL; - tctx->in_idle = 0; + atomic_set(&tctx->in_idle, 0); + tctx->sqpoll = false; io_init_identity(&tctx->__identity); tctx->identity = &tctx->__identity; task->io_uring = tctx; @@ -8388,22 +8421,6 @@ static bool io_match_link(struct io_kiocb *preq, struct io_kiocb *req) return false; } -static bool io_match_link_files(struct io_kiocb *req, - struct files_struct *files) -{ - struct io_kiocb *link; - - if (io_match_files(req, files)) - return true; - if (req->flags & REQ_F_LINK_HEAD) { - list_for_each_entry(link, &req->link_list, link_list) { - if (io_match_files(link, files)) - return true; - } - } - return false; -} - /* * We're looking to cancel 'req' because it's holding on to our files, but * 'req' could be a link to another request. See if it is, and cancel that @@ -8453,7 +8470,21 @@ static bool io_timeout_remove_link(struct io_ring_ctx *ctx, static bool io_cancel_link_cb(struct io_wq_work *work, void *data) { - return io_match_link(container_of(work, struct io_kiocb, work), data); + struct io_kiocb *req = container_of(work, struct io_kiocb, work); + bool ret; + + if (req->flags & REQ_F_LINK_TIMEOUT) { + unsigned long flags; + struct io_ring_ctx *ctx = req->ctx; + + /* protect against races with linked timeouts */ + spin_lock_irqsave(&ctx->completion_lock, flags); + ret = io_match_link(req, data); + spin_unlock_irqrestore(&ctx->completion_lock, flags); + } else { + ret = io_match_link(req, data); + } + return ret; } static void io_attempt_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req) @@ -8479,6 +8510,7 @@ static void io_attempt_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req) } static void io_cancel_defer_files(struct io_ring_ctx *ctx, + struct task_struct *task, struct files_struct *files) { struct io_defer_entry *de = NULL; @@ -8486,7 +8518,8 @@ static void io_cancel_defer_files(struct io_ring_ctx *ctx, spin_lock_irq(&ctx->completion_lock); list_for_each_entry_reverse(de, &ctx->defer_list, list) { - if (io_match_link_files(de->req, files)) { + if (io_task_match(de->req, task) && + io_match_files(de->req, files)) { list_cut_position(&list, &ctx->defer_list, &de->list); break; } @@ -8512,7 +8545,6 @@ static bool io_uring_cancel_files(struct io_ring_ctx *ctx, if (list_empty_careful(&ctx->inflight_list)) return false; - io_cancel_defer_files(ctx, files); /* cancel all at once, should be faster than doing it one by one*/ io_wq_cancel_cb(ctx->io_wq, io_wq_files_match, files, true); @@ -8598,8 +8630,16 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, { struct task_struct *task = current; - if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) + if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) { task = ctx->sq_data->thread; + atomic_inc(&task->io_uring->in_idle); + io_sq_thread_park(ctx->sq_data); + } + + if (files) + io_cancel_defer_files(ctx, NULL, files); + else + io_cancel_defer_files(ctx, task, NULL); io_cqring_overflow_flush(ctx, true, task, files); @@ -8607,12 +8647,23 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, io_run_task_work(); cond_resched(); } + + if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) { + atomic_dec(&task->io_uring->in_idle); + /* + * If the files that are going away are the ones in the thread + * identity, clear them out. + */ + if (task->io_uring->identity->files == files) + task->io_uring->identity->files = NULL; + io_sq_thread_unpark(ctx->sq_data); + } } /* * Note that this task has used io_uring. We use it for cancelation purposes. */ -static int io_uring_add_task_file(struct file *file) +static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file) { struct io_uring_task *tctx = current->io_uring; @@ -8634,6 +8685,14 @@ static int io_uring_add_task_file(struct file *file) tctx->last = file; } + /* + * This is race safe in that the task itself is doing this, hence it + * cannot be going through the exit/cancel paths at the same time. + * This cannot be modified while exit/cancel is running. + */ + if (!tctx->sqpoll && (ctx->flags & IORING_SETUP_SQPOLL)) + tctx->sqpoll = true; + return 0; } @@ -8675,7 +8734,7 @@ void __io_uring_files_cancel(struct files_struct *files) unsigned long index; /* make sure overflow events are dropped */ - tctx->in_idle = true; + atomic_inc(&tctx->in_idle); xa_for_each(&tctx->xa, index, file) { struct io_ring_ctx *ctx = file->private_data; @@ -8684,6 +8743,35 @@ void __io_uring_files_cancel(struct files_struct *files) if (files) io_uring_del_task_file(file); } + + atomic_dec(&tctx->in_idle); +} + +static s64 tctx_inflight(struct io_uring_task *tctx) +{ + unsigned long index; + struct file *file; + s64 inflight; + + inflight = percpu_counter_sum(&tctx->inflight); + if (!tctx->sqpoll) + return inflight; + + /* + * If we have SQPOLL rings, then we need to iterate and find them, and + * add the pending count for those. + */ + xa_for_each(&tctx->xa, index, file) { + struct io_ring_ctx *ctx = file->private_data; + + if (ctx->flags & IORING_SETUP_SQPOLL) { + struct io_uring_task *__tctx = ctx->sqo_task->io_uring; + + inflight += percpu_counter_sum(&__tctx->inflight); + } + } + + return inflight; } /* @@ -8697,11 +8785,11 @@ void __io_uring_task_cancel(void) s64 inflight; /* make sure overflow events are dropped */ - tctx->in_idle = true; + atomic_inc(&tctx->in_idle); do { /* read completions before cancelations */ - inflight = percpu_counter_sum(&tctx->inflight); + inflight = tctx_inflight(tctx); if (!inflight) break; __io_uring_files_cancel(NULL); @@ -8712,13 +8800,13 @@ void __io_uring_task_cancel(void) * If we've seen completions, retry. This avoids a race where * a completion comes in before we did prepare_to_wait(). */ - if (inflight != percpu_counter_sum(&tctx->inflight)) + if (inflight != tctx_inflight(tctx)) continue; schedule(); } while (1); finish_wait(&tctx->wait, &wait); - tctx->in_idle = false; + atomic_dec(&tctx->in_idle); } static int io_uring_flush(struct file *file, void *data) @@ -8863,7 +8951,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, io_sqpoll_wait_sq(ctx); submitted = to_submit; } else if (to_submit) { - ret = io_uring_add_task_file(f.file); + ret = io_uring_add_task_file(ctx, f.file); if (unlikely(ret)) goto out; mutex_lock(&ctx->uring_lock); @@ -8900,7 +8988,8 @@ out_fput: #ifdef CONFIG_PROC_FS static int io_uring_show_cred(int id, void *p, void *data) { - const struct cred *cred = p; + struct io_identity *iod = p; + const struct cred *cred = iod->creds; struct seq_file *m = data; struct user_namespace *uns = seq_user_ns(m); struct group_info *gi; @@ -9092,7 +9181,7 @@ err_fd: #if defined(CONFIG_UNIX) ctx->ring_sock->file = file; #endif - if (unlikely(io_uring_add_task_file(file))) { + if (unlikely(io_uring_add_task_file(ctx, file))) { file = ERR_PTR(-ENOMEM); goto err_fd; } diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 868364cea3b7..35b2d845704d 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -30,7 +30,8 @@ struct io_uring_task { struct percpu_counter inflight; struct io_identity __identity; struct io_identity *identity; - bool in_idle; + atomic_t in_idle; + bool sqpoll; }; #if defined(CONFIG_IO_URING) diff --git a/kernel/entry/common.c b/kernel/entry/common.c index 2b8366693d5c..e9e2df3f3f9e 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -337,10 +337,10 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs) * already contains a warning when RCU is not watching, so no point * in having another one here. */ + lockdep_hardirqs_off(CALLER_ADDR0); instrumentation_begin(); rcu_irq_enter_check_tick(); - /* Use the combo lockdep/tracing function */ - trace_hardirqs_off(); + trace_hardirqs_off_finish(); instrumentation_end(); return ret; diff --git a/kernel/futex.c b/kernel/futex.c index f8614ef4ff31..ac328874f6e5 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2380,10 +2380,22 @@ retry: } /* - * Since we just failed the trylock; there must be an owner. + * The trylock just failed, so either there is an owner or + * there is a higher priority waiter than this one. */ newowner = rt_mutex_owner(&pi_state->pi_mutex); - BUG_ON(!newowner); + /* + * If the higher priority waiter has not yet taken over the + * rtmutex then newowner is NULL. We can't return here with + * that state because it's inconsistent vs. the user space + * state. So drop the locks and try again. It's a valid + * situation and not any different from the other retry + * conditions. + */ + if (unlikely(!newowner)) { + err = -EAGAIN; + goto handle_err; + } } else { WARN_ON_ONCE(argowner != current); if (oldowner == current) { diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 10a5aff4eecc..164a031cfdb6 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -82,6 +82,7 @@ config IRQ_FASTEOI_HIERARCHY_HANDLERS # Generic IRQ IPI support config GENERIC_IRQ_IPI bool + select IRQ_DOMAIN_HIERARCHY # Generic MSI interrupt support config GENERIC_MSI_IRQ |