aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.mailmap1
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst7
-rw-r--r--arch/arm/Kconfig2
-rw-r--r--arch/arm/kernel/entry-armv.S12
-rw-r--r--arch/parisc/mm/init.c16
-rw-r--r--arch/x86/coco/tdx/tdx.c1
-rw-r--r--arch/x86/events/intel/core.c23
-rw-r--r--arch/x86/include/asm/fpu/types.h7
-rw-r--r--arch/x86/include/asm/page_64.h1
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h4
-rw-r--r--arch/x86/include/asm/resctrl.h6
-rw-r--r--arch/x86/kernel/apic/apic.c11
-rw-r--r--arch/x86/kernel/cpu/resctrl/core.c8
-rw-r--r--arch/x86/kernel/fpu/xstate.c3
-rw-r--r--arch/x86/kernel/fpu/xstate.h4
-rw-r--r--arch/x86/mm/init_64.c4
-rw-r--r--arch/x86/mm/kaslr.c32
-rw-r--r--drivers/ata/libata-core.c4
-rw-r--r--drivers/irqchip/irq-gic-v2m.c6
-rw-r--r--drivers/irqchip/irq-gic-v3-its.c16
-rw-r--r--drivers/irqchip/irq-gic-v3.c21
-rw-r--r--drivers/irqchip/irq-msi-lib.c5
-rw-r--r--drivers/irqchip/irq-riscv-aplic-main.c4
-rw-r--r--drivers/irqchip/irq-sifive-plic.c115
-rw-r--r--fs/btrfs/ctree.h1
-rw-r--r--fs/btrfs/direct-io.c16
-rw-r--r--fs/btrfs/file.c9
-rw-r--r--fs/btrfs/qgroup.c3
-rw-r--r--fs/btrfs/transaction.h6
-rw-r--r--fs/btrfs/zoned.c30
-rw-r--r--fs/fuse/dev.c14
-rw-r--r--fs/fuse/dir.c2
-rw-r--r--fs/fuse/file.c8
-rw-r--r--fs/fuse/inode.c7
-rw-r--r--fs/fuse/xattr.c4
-rw-r--r--fs/netfs/fscache_main.c1
-rw-r--r--fs/nilfs2/recovery.c35
-rw-r--r--fs/nilfs2/segment.c10
-rw-r--r--fs/nilfs2/sysfs.c43
-rw-r--r--fs/smb/server/smb2pdu.c4
-rw-r--r--fs/smb/server/transport_tcp.c4
-rw-r--r--fs/smb/server/xattr.h2
-rw-r--r--include/linux/mm.h4
-rw-r--r--include/linux/resctrl.h1
-rw-r--r--kernel/kexec_file.c2
-rw-r--r--kernel/locking/rtmutex.c9
-rw-r--r--kernel/resource.c6
-rw-r--r--lib/codetag.c17
-rw-r--r--lib/maple_tree.c7
-rw-r--r--mm/filemap.c2
-rw-r--r--mm/memcontrol.c12
-rw-r--r--mm/memory_hotplug.c2
-rw-r--r--mm/page_alloc.c7
-rw-r--r--mm/slub.c4
-rw-r--r--mm/sparse.c2
-rw-r--r--mm/userfaultfd.c29
-rw-r--r--mm/vmalloc.c7
-rw-r--r--mm/vmscan.c24
-rwxr-xr-xscripts/gfp-translate66
-rw-r--r--tools/testing/selftests/mm/mseal_test.c37
-rw-r--r--tools/testing/selftests/mm/seal_elf.c13
61 files changed, 499 insertions, 264 deletions
diff --git a/.mailmap b/.mailmap
index caf46a652f15..a7969e2d590a 100644
--- a/.mailmap
+++ b/.mailmap
@@ -269,6 +269,7 @@ James Ketrenos <jketreno@io.(none)>
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 86311c2907cd..95c18bc17083 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1717,9 +1717,10 @@ The following nested keys are defined.
entries fault back in or are written out to disk.
memory.zswap.writeback
- A read-write single value file. The default value is "1". The
- initial value of the root cgroup is 1, and when a new cgroup is
- created, it inherits the current value of its parent.
+ A read-write single value file. The default value is "1".
+ Note that this setting is hierarchical, i.e. the writeback would be
+ implicitly disabled for child cgroups if the upper hierarchy
+ does so.
When this is set to 0, all swapping attempts to swapping devices
are disabled. This included both zswap writebacks, and swapping due
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 54b2bb817a7f..173159e93c99 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -117,7 +117,7 @@ config ARM
select HAVE_KERNEL_XZ
select HAVE_KPROBES if !XIP_KERNEL && !CPU_ENDIAN_BE32 && !CPU_V7M
select HAVE_KRETPROBES if HAVE_KPROBES
- select HAVE_LD_DEAD_CODE_DATA_ELIMINATION
+ select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if (LD_VERSION >= 23600 || LD_IS_LLD)
select HAVE_MOD_ARCH_SPECIFIC
select HAVE_NMI
select HAVE_OPTPROBES if !THUMB2_KERNEL
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index f01d23a220e6..1dfae1af8e31 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -29,6 +29,12 @@
#include "entry-header.S"
#include <asm/probes.h>
+#ifdef CONFIG_HAVE_LD_DEAD_CODE_DATA_ELIMINATION
+#define RELOC_TEXT_NONE .reloc .text, R_ARM_NONE, .
+#else
+#define RELOC_TEXT_NONE
+#endif
+
/*
* Interrupt handling.
*/
@@ -1065,7 +1071,7 @@ vector_addrexcptn:
.globl vector_fiq
.section .vectors, "ax", %progbits
- .reloc .text, R_ARM_NONE, .
+ RELOC_TEXT_NONE
W(b) vector_rst
W(b) vector_und
ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_swi )
@@ -1079,7 +1085,7 @@ THUMB( .reloc ., R_ARM_THM_PC12, .L__vector_swi )
#ifdef CONFIG_HARDEN_BRANCH_HISTORY
.section .vectors.bhb.loop8, "ax", %progbits
- .reloc .text, R_ARM_NONE, .
+ RELOC_TEXT_NONE
W(b) vector_rst
W(b) vector_bhb_loop8_und
ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_bhb_loop8_swi )
@@ -1092,7 +1098,7 @@ THUMB( .reloc ., R_ARM_THM_PC12, .L__vector_bhb_loop8_swi )
W(b) vector_bhb_loop8_fiq
.section .vectors.bhb.bpiall, "ax", %progbits
- .reloc .text, R_ARM_NONE, .
+ RELOC_TEXT_NONE
W(b) vector_rst
W(b) vector_bhb_bpiall_und
ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_bhb_bpiall_swi )
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 34d91cb8b259..96970fa75e4a 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -459,7 +459,6 @@ void free_initmem(void)
unsigned long kernel_end = (unsigned long)&_end;
/* Remap kernel text and data, but do not touch init section yet. */
- kernel_set_to_readonly = true;
map_pages(init_end, __pa(init_end), kernel_end - init_end,
PAGE_KERNEL, 0);
@@ -493,11 +492,18 @@ void free_initmem(void)
#ifdef CONFIG_STRICT_KERNEL_RWX
void mark_rodata_ro(void)
{
- /* rodata memory was already mapped with KERNEL_RO access rights by
- pagetable_init() and map_pages(). No need to do additional stuff here */
- unsigned long roai_size = __end_ro_after_init - __start_ro_after_init;
+ unsigned long start = (unsigned long) &__start_rodata;
+ unsigned long end = (unsigned long) &__end_rodata;
+
+ pr_info("Write protecting the kernel read-only data: %luk\n",
+ (end - start) >> 10);
+
+ kernel_set_to_readonly = true;
+ map_pages(start, __pa(start), end - start, PAGE_KERNEL, 0);
- pr_info("Write protected read-only-after-init data: %luk\n", roai_size >> 10);
+ /* force the kernel to see the new page table entries */
+ flush_cache_all();
+ flush_tlb_all();
}
#endif
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 078e2bac2553..da8b66dce0da 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -389,7 +389,6 @@ static bool mmio_read(int size, unsigned long addr, unsigned long *val)
.r12 = size,
.r13 = EPT_READ,
.r14 = addr,
- .r15 = *val,
};
if (__tdx_hypercall(&args))
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 0c9c2706d4ec..9e519d8a810a 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4589,6 +4589,25 @@ static enum hybrid_cpu_type adl_get_hybrid_cpu_type(void)
return HYBRID_INTEL_CORE;
}
+static inline bool erratum_hsw11(struct perf_event *event)
+{
+ return (event->hw.config & INTEL_ARCH_EVENT_MASK) ==
+ X86_CONFIG(.event=0xc0, .umask=0x01);
+}
+
+/*
+ * The HSW11 requires a period larger than 100 which is the same as the BDM11.
+ * A minimum period of 128 is enforced as well for the INST_RETIRED.ALL.
+ *
+ * The message 'interrupt took too long' can be observed on any counter which
+ * was armed with a period < 32 and two events expired in the same NMI.
+ * A minimum period of 32 is enforced for the rest of the events.
+ */
+static void hsw_limit_period(struct perf_event *event, s64 *left)
+{
+ *left = max(*left, erratum_hsw11(event) ? 128 : 32);
+}
+
/*
* Broadwell:
*
@@ -4606,8 +4625,7 @@ static enum hybrid_cpu_type adl_get_hybrid_cpu_type(void)
*/
static void bdw_limit_period(struct perf_event *event, s64 *left)
{
- if ((event->hw.config & INTEL_ARCH_EVENT_MASK) ==
- X86_CONFIG(.event=0xc0, .umask=0x01)) {
+ if (erratum_hsw11(event)) {
if (*left < 128)
*left = 128;
*left &= ~0x3fULL;
@@ -6766,6 +6784,7 @@ __init int intel_pmu_init(void)
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
+ x86_pmu.limit_period = hsw_limit_period;
x86_pmu.lbr_double_abort = true;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
hsw_format_attr : nhm_format_attr;
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index eb17f31b06d2..de16862bf230 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -591,6 +591,13 @@ struct fpu_state_config {
* even without XSAVE support, i.e. legacy features FP + SSE
*/
u64 legacy_features;
+ /*
+ * @independent_features:
+ *
+ * Features that are supported by XSAVES, but not managed as part of
+ * the FPU core, such as LBR
+ */
+ u64 independent_features;
};
/* FPU state configuration information */
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index af4302d79b59..f3d257c45225 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -17,6 +17,7 @@ extern unsigned long phys_base;
extern unsigned long page_offset_base;
extern unsigned long vmalloc_base;
extern unsigned long vmemmap_base;
+extern unsigned long physmem_end;
static __always_inline unsigned long __phys_addr_nodebug(unsigned long x)
{
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 9053dfe9fa03..a98e53491a4e 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -140,6 +140,10 @@ extern unsigned int ptrs_per_p4d;
# define VMEMMAP_START __VMEMMAP_BASE_L4
#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */
+#ifdef CONFIG_RANDOMIZE_MEMORY
+# define PHYSMEM_END physmem_end
+#endif
+
/*
* End of the region for which vmalloc page tables are pre-allocated.
* For non-KMSAN builds, this is the same as VMALLOC_END.
diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
index 12dbd2588ca7..8b1b6ce1e51b 100644
--- a/arch/x86/include/asm/resctrl.h
+++ b/arch/x86/include/asm/resctrl.h
@@ -156,12 +156,6 @@ static inline void resctrl_sched_in(struct task_struct *tsk)
__resctrl_sched_in(tsk);
}
-static inline u32 resctrl_arch_system_num_rmid_idx(void)
-{
- /* RMID are independent numbers for x86. num_rmid_idx == num_rmid */
- return boot_cpu_data.x86_cache_max_rmid + 1;
-}
-
static inline void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid)
{
*rmid = idx;
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 66fd4b2a37a3..373638691cd4 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1775,12 +1775,9 @@ static __init void apic_set_fixmap(bool read_apic);
static __init void x2apic_disable(void)
{
- u32 x2apic_id, state = x2apic_state;
+ u32 x2apic_id;
- x2apic_mode = 0;
- x2apic_state = X2APIC_DISABLED;
-
- if (state != X2APIC_ON)
+ if (x2apic_state < X2APIC_ON)
return;
x2apic_id = read_apic_id();
@@ -1793,6 +1790,10 @@ static __init void x2apic_disable(void)
}
__x2apic_disable();
+
+ x2apic_mode = 0;
+ x2apic_state = X2APIC_DISABLED;
+
/*
* Don't reread the APIC ID as it was already done from
* check_x2apic() and the APIC driver still is a x2APIC variant,
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 1930fce9dfe9..8591d53c144b 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -119,6 +119,14 @@ struct rdt_hw_resource rdt_resources_all[] = {
},
};
+u32 resctrl_arch_system_num_rmid_idx(void)
+{
+ struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+
+ /* RMID are independent numbers for x86. num_rmid_idx == num_rmid */
+ return r->num_rmid;
+}
+
/*
* cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs
* as they do not have CPUID enumeration support for Cache allocation.
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index c5a026fee5e0..1339f8328db5 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -788,6 +788,9 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
goto out_disable;
}
+ fpu_kernel_cfg.independent_features = fpu_kernel_cfg.max_features &
+ XFEATURE_MASK_INDEPENDENT;
+
/*
* Clear XSAVE features that are disabled in the normal CPUID.
*/
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index 2ee0b9c53dcc..afb404cd2059 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -62,9 +62,9 @@ static inline u64 xfeatures_mask_supervisor(void)
static inline u64 xfeatures_mask_independent(void)
{
if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR))
- return XFEATURE_MASK_INDEPENDENT & ~XFEATURE_MASK_LBR;
+ return fpu_kernel_cfg.independent_features & ~XFEATURE_MASK_LBR;
- return XFEATURE_MASK_INDEPENDENT;
+ return fpu_kernel_cfg.independent_features;
}
/* XSAVE/XRSTOR wrapper functions */
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index d8dbeac8b206..ff253648706f 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -958,8 +958,12 @@ static void update_end_of_memory_vars(u64 start, u64 size)
int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
struct mhp_params *params)
{
+ unsigned long end = ((start_pfn + nr_pages) << PAGE_SHIFT) - 1;
int ret;
+ if (WARN_ON_ONCE(end > PHYSMEM_END))
+ return -ERANGE;
+
ret = __add_pages(nid, start_pfn, nr_pages, params);
WARN_ON_ONCE(ret);
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 37db264866b6..230f1dee4f09 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -47,13 +47,24 @@ static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE;
*/
static __initdata struct kaslr_memory_region {
unsigned long *base;
+ unsigned long *end;
unsigned long size_tb;
} kaslr_regions[] = {
- { &page_offset_base, 0 },
- { &vmalloc_base, 0 },
- { &vmemmap_base, 0 },
+ {
+ .base = &page_offset_base,
+ .end = &physmem_end,
+ },
+ {
+ .base = &vmalloc_base,
+ },
+ {
+ .base = &vmemmap_base,
+ },
};
+/* The end of the possible address space for physical memory */
+unsigned long physmem_end __ro_after_init;
+
/* Get size in bytes used by the memory region */
static inline unsigned long get_padding(struct kaslr_memory_region *region)
{
@@ -82,6 +93,8 @@ void __init kernel_randomize_memory(void)
BUILD_BUG_ON(vaddr_end != CPU_ENTRY_AREA_BASE);
BUILD_BUG_ON(vaddr_end > __START_KERNEL_map);
+ /* Preset the end of the possible address space for physical memory */
+ physmem_end = ((1ULL << MAX_PHYSMEM_BITS) - 1);
if (!kaslr_memory_enabled())
return;
@@ -128,11 +141,18 @@ void __init kernel_randomize_memory(void)
vaddr += entropy;
*kaslr_regions[i].base = vaddr;
+ /* Calculate the end of the region */
+ vaddr += get_padding(&kaslr_regions[i]);
/*
- * Jump the region and add a minimum padding based on
- * randomization alignment.
+ * KASLR trims the maximum possible size of the
+ * direct-map. Update the physmem_end boundary.
+ * No rounding required as the region starts
+ * PUD aligned and size is in units of TB.
*/
- vaddr += get_padding(&kaslr_regions[i]);
+ if (kaslr_regions[i].end)
+ *kaslr_regions[i].end = __pa_nodebug(vaddr - 1);
+
+ /* Add a minimum padding based on randomization alignment. */
vaddr = round_up(vaddr + 1, PUD_SIZE);
remain_entropy -= entropy;
}
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index c7752dc80028..30932552437a 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -5593,8 +5593,10 @@ struct ata_host *ata_host_alloc(struct device *dev, int n_ports)
}
dr = devres_alloc(ata_devres_release, 0, GFP_KERNEL);
- if (!dr)
+ if (!dr) {
+ kfree(host);
goto err_out;
+ }
devres_add(dev, dr);
dev_set_drvdata(dev, host);
diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c
index 51af63c046ed..be35c5349986 100644
--- a/drivers/irqchip/irq-gic-v2m.c
+++ b/drivers/irqchip/irq-gic-v2m.c
@@ -407,12 +407,12 @@ static int __init gicv2m_of_init(struct fwnode_handle *parent_handle,
ret = gicv2m_init_one(&child->fwnode, spi_start, nr_spis,
&res, 0);
- if (ret) {
- of_node_put(child);
+ if (ret)
break;
- }
}
+ if (ret && child)
+ of_node_put(child);
if (!ret)
ret = gicv2m_allocate_domains(parent);
if (ret)
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 9b34596b3542..fdec478ba5e7 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -1330,12 +1330,6 @@ static void its_send_vmovp(struct its_vpe *vpe)
}
/*
- * Protect against concurrent updates of the mapping state on
- * individual VMs.
- */
- guard(raw_spinlock_irqsave)(&vpe->its_vm->vmapp_lock);
-
- /*
* Yet another marvel of the architecture. If using the
* its_list "feature", we need to make sure that all ITSs
* receive all VMOVP commands in the same order. The only way
@@ -3824,7 +3818,14 @@ static int its_vpe_set_affinity(struct irq_data *d,
* protect us, and that we must ensure nobody samples vpe->col_idx
* during the update, hence the lock below which must also be
* taken on any vLPI handling path that evaluates vpe->col_idx.
+ *
+ * Finally, we must protect ourselves against concurrent updates of
+ * the mapping state on this VM should the ITS list be in use (see
+ * the shortcut in its_send_vmovp() otherewise).
*/
+ if (its_list_map)
+ raw_spin_lock(&vpe->its_vm->vmapp_lock);
+
from = vpe_to_cpuid_lock(vpe, &flags);
table_mask = gic_data_rdist_cpu(from)->vpe_table_mask;
@@ -3854,6 +3855,9 @@ out:
irq_data_update_effective_affinity(d, cpumask_of(cpu));
vpe_to_cpuid_unlock(vpe, flags);
+ if (its_list_map)
+ raw_spin_unlock(&vpe->its_vm->vmapp_lock);
+
return IRQ_SET_MASK_OK_DONE;
}
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index c19083bfb943..74f21e03d4a3 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -1154,14 +1154,8 @@ static void gic_update_rdist_properties(void)
gic_data.rdists.has_vpend_valid_dirty ? "Valid+Dirty " : "");
}
-static void gic_cpu_sys_reg_init(void)
+static void gic_cpu_sys_reg_enable(void)
{
- int i, cpu = smp_processor_id();
- u64 mpidr = gic_cpu_to_affinity(cpu);
- u64 need_rss = MPIDR_RS(mpidr);
- bool group0;
- u32 pribits;
-
/*
* Need to check that the SRE bit has actually been set. If
* not, it means that SRE is disabled at EL2. We're going to
@@ -1172,6 +1166,16 @@ static void gic_cpu_sys_reg_init(void)
if (!gic_enable_sre())
pr_err("GIC: unable to set SRE (disabled at EL2), panic ahead\n");
+}
+
+static void gic_cpu_sys_reg_init(void)
+{
+ int i, cpu = smp_processor_id();
+ u64 mpidr = gic_cpu_to_affinity(cpu);
+ u64 need_rss = MPIDR_RS(mpidr);
+ bool group0;
+ u32 pribits;
+
pribits = gic_get_pribits();
group0 = gic_has_group0();
@@ -1333,6 +1337,7 @@ static int gic_check_rdist(unsigned int cpu)
static int gic_starting_cpu(unsigned int cpu)
{
+ gic_cpu_sys_reg_enable();
gic_cpu_init();
if (gic_dist_supports_lpis())
@@ -1498,6 +1503,7 @@ static int gic_cpu_pm_notifier(struct notifier_block *self,
if (cmd == CPU_PM_EXIT) {
if (gic_dist_security_disabled())
gic_enable_redist(true);
+ gic_cpu_sys_reg_enable();
gic_cpu_sys_reg_init();
} else if (cmd == CPU_PM_ENTER && gic_dist_security_disabled()) {
gic_write_grpen1(0);
@@ -2070,6 +2076,7 @@ static int __init gic_init_bases(phys_addr_t dist_phys_base,
gic_update_rdist_properties();
+ gic_cpu_sys_reg_enable();
gic_prio_init();
gic_dist_init();
gic_cpu_init();
diff --git a/drivers/irqchip/irq-msi-lib.c b/drivers/irqchip/irq-msi-lib.c
index b5b90003311a..d8e29fc0d406 100644
--- a/drivers/irqchip/irq-msi-lib.c
+++ b/drivers/irqchip/irq-msi-lib.c
@@ -128,6 +128,9 @@ int msi_lib_irq_domain_select(struct irq_domain *d, struct irq_fwspec *fwspec,
const struct msi_parent_ops *ops = d->msi_parent_ops;
u32 busmask = BIT(bus_token);
+ if (!ops)
+ return 0;
+
if (fwspec->fwnode != d->fwnode || fwspec->param_count != 0)
return 0;
@@ -135,6 +138,6 @@ int msi_lib_irq_domain_select(struct irq_domain *d, struct irq_fwspec *fwspec,
if (bus_token == ops->bus_select_token)
return 1;
- return ops && !!(ops->bus_select_mask & busmask);
+ return !!(ops->bus_select_mask & busmask);
}
EXPORT_SYMBOL_GPL(msi_lib_irq_domain_select);
diff --git a/drivers/irqchip/irq-riscv-aplic-main.c b/drivers/irqchip/irq-riscv-aplic-main.c
index 28dd175b5764..981fad6fb8f7 100644
--- a/drivers/irqchip/irq-riscv-aplic-main.c
+++ b/drivers/irqchip/irq-riscv-aplic-main.c
@@ -175,9 +175,9 @@ static int aplic_probe(struct platform_device *pdev)
/* Map the MMIO registers */
regs = devm_platform_ioremap_resource(pdev, 0);
- if (!regs) {
+ if (IS_ERR(regs)) {
dev_err(dev, "failed map MMIO registers\n");
- return -ENOMEM;
+ return PTR_ERR(regs);
}
/*
diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
index 9e22f7e378f5..4d9ea718086d 100644
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -3,6 +3,7 @@
* Copyright (C) 2017 SiFive
* Copyright (C) 2018 Christoph Hellwig
*/
+#define pr_fmt(fmt) "riscv-plic: " fmt
#include <linux/cpu.h>
#include <linux/interrupt.h>
#include <linux/io.h>
@@ -63,7 +64,7 @@
#define PLIC_QUIRK_EDGE_INTERRUPT 0
struct plic_priv {
- struct device *dev;
+ struct fwnode_handle *fwnode;
struct cpumask lmask;
struct irq_domain *irqdomain;
void __iomem *regs;
@@ -378,8 +379,8 @@ static void plic_handle_irq(struct irq_desc *desc)
int err = generic_handle_domain_irq(handler->priv->irqdomain,
hwirq);
if (unlikely(err)) {
- dev_warn_ratelimited(handler->priv->dev,
- "can't find mapping for hwirq %lu\n", hwirq);
+ pr_warn_ratelimited("%pfwP: can't find mapping for hwirq %lu\n",
+ handler->priv->fwnode, hwirq);
}
}
@@ -408,7 +409,8 @@ static int plic_starting_cpu(unsigned int cpu)
enable_percpu_irq(plic_parent_irq,
irq_get_trigger_type(plic_parent_irq));
else
- dev_warn(handler->priv->dev, "cpu%d: parent irq not available\n", cpu);
+ pr_warn("%pfwP: cpu%d: parent irq not available\n",
+ handler->priv->fwnode, cpu);
plic_set_threshold(handler, PLIC_ENABLE_THRESHOLD);
return 0;
@@ -424,38 +426,36 @@ static const struct of_device_id plic_match[] = {
{}
};
-static int plic_parse_nr_irqs_and_contexts(struct platform_device *pdev,
+static int plic_parse_nr_irqs_and_contexts(struct fwnode_handle *fwnode,
u32 *nr_irqs, u32 *nr_contexts)
{
- struct device *dev = &pdev->dev;
int rc;
/*
* Currently, only OF fwnode is supported so extend this
* function for ACPI support.
*/
- if (!is_of_node(dev->fwnode))
+ if (!is_of_node(fwnode))
return -EINVAL;
- rc = of_property_read_u32(to_of_node(dev->fwnode), "riscv,ndev", nr_irqs);
+ rc = of_property_read_u32(to_of_node(fwnode), "riscv,ndev", nr_irqs);
if (rc) {
- dev_err(dev, "riscv,ndev property not available\n");
+ pr_err("%pfwP: riscv,ndev property not available\n", fwnode);
return rc;
}
- *nr_contexts = of_irq_count(to_of_node(dev->fwnode));
+ *nr_contexts = of_irq_count(to_of_node(fwnode));
if (WARN_ON(!(*nr_contexts))) {
- dev_err(dev, "no PLIC context available\n");
+ pr_err("%pfwP: no PLIC context available\n", fwnode);
return -EINVAL;
}
return 0;
}
-static int plic_parse_context_parent(struct platform_device *pdev, u32 context,
+static int plic_parse_context_parent(struct fwnode_handle *fwnode, u32 context,
u32 *parent_hwirq, int *parent_cpu)
{
- struct device *dev = &pdev->dev;
struct of_phandle_args parent;
unsigned long hartid;
int rc;
@@ -464,10 +464,10 @@ static int plic_parse_context_parent(struct platform_device *pdev, u32 context,
* Currently, only OF fwnode is supported so extend this
* function for ACPI support.
*/
- if (!is_of_node(dev->fwnode))
+ if (!is_of_node(fwnode))
return -EINVAL;
- rc = of_irq_parse_one(to_of_node(dev->fwnode), context, &parent);
+ rc = of_irq_parse_one(to_of_node(fwnode), context, &parent);
if (rc)
return rc;
@@ -480,48 +480,55 @@ static int plic_parse_context_parent(struct platform_device *pdev, u32 context,
return 0;
}
-static int plic_probe(struct platform_device *pdev)
+static int plic_probe(struct fwnode_handle *fwnode)
{
int error = 0, nr_contexts, nr_handlers = 0, cpu, i;
- struct device *dev = &pdev->dev;
unsigned long plic_quirks = 0;
struct plic_handler *handler;
u32 nr_irqs, parent_hwirq;
struct plic_priv *priv;
irq_hw_number_t hwirq;
+ void __iomem *regs;
- if (is_of_node(dev->fwnode)) {
+ if (is_of_node(fwnode)) {
const struct of_device_id *id;
- id = of_match_node(plic_match, to_of_node(dev->fwnode));
+ id = of_match_node(plic_match, to_of_node(fwnode));
if (id)
plic_quirks = (unsigned long)id->data;
+
+ regs = of_iomap(to_of_node(fwnode), 0);
+ if (!regs)
+ return -ENOMEM;
+ } else {
+ return -ENODEV;
}
- error = plic_parse_nr_irqs_and_contexts(pdev, &nr_irqs, &nr_contexts);
+ error = plic_parse_nr_irqs_and_contexts(fwnode, &nr_irqs, &nr_contexts);
if (error)
- return error;
+ goto fail_free_regs;
- priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
- if (!priv)
- return -ENOMEM;
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv) {
+ error = -ENOMEM;
+ goto fail_free_regs;
+ }
- priv->dev = dev;
+ priv->fwnode = fwnode;
priv->plic_quirks = plic_quirks;
priv->nr_irqs = nr_irqs;
+ priv->regs = regs;
- priv->regs = devm_platform_ioremap_resource(pdev, 0);
- if (WARN_ON(!priv->regs))
- return -EIO;
-
- priv->prio_save = devm_bitmap_zalloc(dev, nr_irqs, GFP_KERNEL);
- if (!priv->prio_save)
- return -ENOMEM;
+ priv->prio_save = bitmap_zalloc(nr_irqs, GFP_KERNEL);
+ if (!priv->prio_save) {
+ error = -ENOMEM;
+ goto fail_free_priv;
+ }
for (i = 0; i < nr_contexts; i++) {
- error = plic_parse_context_parent(pdev, i, &parent_hwirq, &cpu);
+ error = plic_parse_context_parent(fwnode, i, &parent_hwirq, &cpu);
if (error) {
- dev_warn(dev, "hwirq for context%d not found\n", i);
+ pr_warn("%pfwP: hwirq for context%d not found\n", fwnode, i);
continue;
}
@@ -543,7 +550,7 @@ static int plic_probe(struct platform_device *pdev)
}
if (cpu < 0) {
- dev_warn(dev, "Invalid cpuid for context %d\n", i);
+ pr_warn("%pfwP: Invalid cpuid for context %d\n", fwnode, i);
continue;
}
@@ -554,7 +561,7 @@ static int plic_probe(struct platform_device *pdev)
*/
handler = per_cpu_ptr(&plic_handlers, cpu);
if (handler->present) {
- dev_warn(dev, "handler already present for context %d.\n", i);
+ pr_warn("%pfwP: handler already present for context %d.\n", fwnode, i);
plic_set_threshold(handler, PLIC_DISABLE_THRESHOLD);
goto done;
}
@@ -568,8 +575,8 @@ static int plic_probe(struct platform_device *pdev)
i * CONTEXT_ENABLE_SIZE;
handler->priv = priv;
- handler->enable_save = devm_kcalloc(dev, DIV_ROUND_UP(nr_irqs, 32),
- sizeof(*handler->enable_save), GFP_KERNEL);
+ handler->enable_save = kcalloc(DIV_ROUND_UP(nr_irqs, 32),
+ sizeof(*handler->enable_save), GFP_KERNEL);
if (!handler->enable_save)
goto fail_cleanup_contexts;
done:
@@ -581,7 +588,7 @@ done:
nr_handlers++;
}
- priv->irqdomain = irq_domain_add_linear(to_of_node(dev->fwnode), nr_irqs + 1,
+ priv->irqdomain = irq_domain_add_linear(to_of_node(fwnode), nr_irqs + 1,
&plic_irqdomain_ops, priv);
if (WARN_ON(!priv->irqdomain))
goto fail_cleanup_contexts;
@@ -619,13 +626,13 @@ done:
}
}
- dev_info(dev, "mapped %d interrupts with %d handlers for %d contexts.\n",
- nr_irqs, nr_handlers, nr_contexts);
+ pr_info("%pfwP: mapped %d interrupts with %d handlers for %d contexts.\n",
+ fwnode, nr_irqs, nr_handlers, nr_contexts);
return 0;
fail_cleanup_contexts:
for (i = 0; i < nr_contexts; i++) {
- if (plic_parse_context_parent(pdev, i, &parent_hwirq, &cpu))
+ if (plic_parse_context_parent(fwnode, i, &parent_hwirq, &cpu))
continue;
if (parent_hwirq != RV_IRQ_EXT || cpu < 0)
continue;
@@ -634,17 +641,37 @@ fail_cleanup_contexts:
handler->present = false;
handler->hart_base = NULL;
handler->enable_base = NULL;
+ kfree(handler->enable_save);
handler->enable_save = NULL;
handler->priv = NULL;
}
- return -ENOMEM;
+ bitmap_free(priv->prio_save);
+fail_free_priv:
+ kfree(priv);
+fail_free_regs:
+ iounmap(regs);
+ return error;
+}
+
+static int plic_platform_probe(struct platform_device *pdev)
+{
+ return plic_probe(pdev->dev.fwnode);
}
static struct platform_driver plic_driver = {
.driver = {
.name = "riscv-plic",
.of_match_table = plic_match,
+ .suppress_bind_attrs = true,
},
- .probe = plic_probe,
+ .probe = plic_platform_probe,
};
builtin_platform_driver(plic_driver);
+
+static int __init plic_early_probe(struct device_node *node,
+ struct device_node *parent)
+{
+ return plic_probe(&node->fwnode);
+}
+
+IRQCHIP_DECLARE(riscv, "allwinner,sun20i-d1-plic", plic_early_probe);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 75fa563e4cac..c8568b1a61c4 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -459,7 +459,6 @@ struct btrfs_file_private {
void *filldir_buf;
u64 last_index;
struct extent_state *llseek_cached_state;
- bool fsync_skip_inode_lock;
};
static inline u32 BTRFS_LEAF_DATA_SIZE(const struct btrfs_fs_info *info)
diff --git a/fs/btrfs/direct-io.c b/fs/btrfs/direct-io.c
index 67adbe9d294a..364bce34f034 100644
--- a/fs/btrfs/direct-io.c
+++ b/fs/btrfs/direct-io.c
@@ -864,13 +864,6 @@ again:
if (IS_ERR_OR_NULL(dio)) {
ret = PTR_ERR_OR_ZERO(dio);
} else {
- struct btrfs_file_private stack_private = { 0 };
- struct btrfs_file_private *private;
- const bool have_private = (file->private_data != NULL);
-
- if (!have_private)
- file->private_data = &stack_private;
-
/*
* If we have a synchronous write, we must make sure the fsync
* triggered by the iomap_dio_complete() call below doesn't
@@ -879,13 +872,10 @@ again:
* partial writes due to the input buffer (or parts of it) not
* being already faulted in.
*/
- private = file->private_data;
- private->fsync_skip_inode_lock = true;
+ ASSERT(current->journal_info == NULL);
+ current->journal_info = BTRFS_TRANS_DIO_WRITE_STUB;
ret = iomap_dio_complete(dio);
- private->fsync_skip_inode_lock = false;
-
- if (!have_private)
- file->private_data = NULL;
+ current->journal_info = NULL;
}
/* No increment (+=) because iomap returns a cumulative value. */
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 9914419f3b7d..2aeb8116549c 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1603,7 +1603,6 @@ static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx)
*/
int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{
- struct btrfs_file_private *private = file->private_data;
struct dentry *dentry = file_dentry(file);
struct btrfs_inode *inode = BTRFS_I(d_inode(dentry));
struct btrfs_root *root = inode->root;
@@ -1613,7 +1612,13 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
int ret = 0, err;
u64 len;
bool full_sync;
- const bool skip_ilock = (private ? private->fsync_skip_inode_lock : false);
+ bool skip_ilock = false;
+
+ if (current->journal_info == BTRFS_TRANS_DIO_WRITE_STUB) {
+ skip_ilock = true;
+ current->journal_info = NULL;
+ lockdep_assert_held(&inode->vfs_inode.i_rwsem);
+ }
trace_btrfs_sync_file(file, datasync);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 7d6f5d9420ec..feb8f9f2f358 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -4346,10 +4346,9 @@ static int __btrfs_qgroup_release_data(struct btrfs_inode *inode,
int ret;
if (btrfs_qgroup_mode(inode->root->fs_info) == BTRFS_QGROUP_MODE_DISABLED) {
- extent_changeset_init(&changeset);
return clear_record_extent_bits(&inode->io_tree, start,
start + len - 1,
- EXTENT_QGROUP_RESERVED, &changeset);
+ EXTENT_QGROUP_RESERVED, NULL);
}
/* In release case, we shouldn't have @reserved */
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 98c03ddc760b..dd9ce9b9f69e 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -27,6 +27,12 @@ struct btrfs_root_item;
struct btrfs_root;
struct btrfs_path;
+/*
+ * Signal that a direct IO write is in progress, to avoid deadlock for sync
+ * direct IO writes when fsync is called during the direct IO write path.
+ */
+#define BTRFS_TRANS_DIO_WRITE_STUB ((void *) 1)
+
/* Radix-tree tag for roots that are part of the trasaction. */
#define BTRFS_ROOT_TRANS_TAG 0
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 66f63e82af79..047e3337852e 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1406,6 +1406,8 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
return -EINVAL;
}
+ bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity);
+
if (zone_info[0].alloc_offset == WP_MISSING_DEV) {
btrfs_err(bg->fs_info,
"zoned: cannot recover write pointer for zone %llu",
@@ -1432,7 +1434,6 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
}
bg->alloc_offset = zone_info[0].alloc_offset;
- bg->zone_capacity = min(zone_info[0].capacity, zone_info[1].capacity);
return 0;
}
@@ -1450,6 +1451,9 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
return -EINVAL;
}
+ /* In case a device is missing we have a cap of 0, so don't use it. */
+ bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity);
+
for (i = 0; i < map->num_stripes; i++) {
if (zone_info[i].alloc_offset == WP_MISSING_DEV ||
zone_info[i].alloc_offset == WP_CONVENTIONAL)
@@ -1471,9 +1475,6 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
if (test_bit(0, active))
set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags);
}
- /* In case a device is missing we have a cap of 0, so don't use it. */
- bg->zone_capacity = min_not_zero(zone_info[0].capacity,
- zone_info[1].capacity);
}
if (zone_info[0].alloc_offset != WP_MISSING_DEV)
@@ -1563,6 +1564,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
unsigned long *active = NULL;
u64 last_alloc = 0;
u32 num_sequential = 0, num_conventional = 0;
+ u64 profile;
if (!btrfs_is_zoned(fs_info))
return 0;
@@ -1623,7 +1625,8 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
}
}
- switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
+ profile = map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
+ switch (profile) {
case 0: /* single */
ret = btrfs_load_block_group_single(cache, &zone_info[0], active);
break;
@@ -1650,6 +1653,23 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
goto out;
}
+ if (ret == -EIO && profile != 0 && profile != BTRFS_BLOCK_GROUP_RAID0 &&
+ profile != BTRFS_BLOCK_GROUP_RAID10) {
+ /*
+ * Detected broken write pointer. Make this block group
+ * unallocatable by setting the allocation pointer at the end of
+ * allocatable region. Relocating this block group will fix the
+ * mismatch.
+ *
+ * Currently, we cannot handle RAID0 or RAID10 case like this
+ * because we don't have a proper zone_capacity value. But,
+ * reading from this block group won't work anyway by a missing
+ * stripe.
+ */
+ cache->alloc_offset = cache->zone_capacity;
+ ret = 0;
+ }
+
out:
/* Reject non SINGLE data profiles without RST */
if ((map->type & BTRFS_BLOCK_GROUP_DATA) &&
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 7146038b2fe7..f0c9cd1a0b39 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -31,6 +31,8 @@ MODULE_ALIAS("devname:fuse");
static struct kmem_cache *fuse_req_cachep;
+static void end_requests(struct list_head *head);
+
static struct fuse_dev *fuse_get_dev(struct file *file)
{
/*
@@ -773,7 +775,6 @@ static int fuse_check_folio(struct folio *folio)
(folio->flags & PAGE_FLAGS_CHECK_AT_PREP &
~(1 << PG_locked |
1 << PG_referenced |
- 1 << PG_uptodate |
1 << PG_lru |
1 << PG_active |
1 << PG_workingset |
@@ -818,9 +819,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
newfolio = page_folio(buf->page);
- if (!folio_test_uptodate(newfolio))
- folio_mark_uptodate(newfolio);
-
+ folio_clear_uptodate(newfolio);
folio_clear_mappedtodisk(newfolio);
if (fuse_check_folio(newfolio) != 0)
@@ -1822,6 +1821,13 @@ static void fuse_resend(struct fuse_conn *fc)
}
spin_lock(&fiq->lock);
+ if (!fiq->connected) {
+ spin_unlock(&fiq->lock);
+ list_for_each_entry(req, &to_queue, list)
+ clear_bit(FR_PENDING, &req->flags);
+ end_requests(&to_queue);
+ return;
+ }
/* iq and pq requests are both oldest to newest */
list_splice(&to_queue, &fiq->pending);
fiq->ops->wake_pending_and_unlock(fiq);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 2b0d4781f394..8e96df9fd76c 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -670,7 +670,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
err = get_create_ext(&args, dir, entry, mode);
if (err)
- goto out_put_forget_req;
+ goto out_free_ff;
err = fuse_simple_request(fm, &args);
free_ext_value(&args);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index f39456c65ed7..ed76121f73f2 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1832,10 +1832,16 @@ __acquires(fi->lock)
fuse_writepage_finish(fm, wpa);
spin_unlock(&fi->lock);
- /* After fuse_writepage_finish() aux request list is private */
+ /* After rb_erase() aux request list is private */
for (aux = wpa->next; aux; aux = next) {
+ struct backing_dev_info *bdi = inode_to_bdi(aux->inode);
+
next = aux->next;
aux->next = NULL;
+
+ dec_wb_stat(&bdi->wb, WB_WRITEBACK);
+ dec_node_page_state(aux->ia.ap.pages[0], NR_WRITEBACK_TEMP);
+ wb_writeout_inc(&bdi->wb);
fuse_writepage_free(aux);
}
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index d8ab4e93916f..bebd89002328 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1332,11 +1332,16 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
* on a stacked fs (e.g. overlayfs) themselves and with
* max_stack_depth == 1, FUSE fs can be stacked as the
* underlying fs of a stacked fs (e.g. overlayfs).
+ *
+ * Also don't allow the combination of FUSE_PASSTHROUGH
+ * and FUSE_WRITEBACK_CACHE, current design doesn't handle
+ * them together.
*/
if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH) &&
(flags & FUSE_PASSTHROUGH) &&
arg->max_stack_depth > 0 &&
- arg->max_stack_depth <= FILESYSTEM_MAX_STACK_DEPTH) {
+ arg->max_stack_depth <= FILESYSTEM_MAX_STACK_DEPTH &&
+ !(flags & FUSE_WRITEBACK_CACHE)) {
fc->passthrough = 1;
fc->max_stack_depth = arg->max_stack_depth;
fm->sb->s_stack_depth = arg->max_stack_depth;
diff --git a/fs/fuse/xattr.c b/fs/fuse/xattr.c
index 5b423fdbb13f..9f568d345c51 100644
--- a/fs/fuse/xattr.c
+++ b/fs/fuse/xattr.c
@@ -81,7 +81,7 @@ ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value,
}
ret = fuse_simple_request(fm, &args);
if (!ret && !size)
- ret = min_t(ssize_t, outarg.size, XATTR_SIZE_MAX);
+ ret = min_t(size_t, outarg.size, XATTR_SIZE_MAX);
if (ret == -ENOSYS) {
fm->fc->no_getxattr = 1;
ret = -EOPNOTSUPP;
@@ -143,7 +143,7 @@ ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
}
ret = fuse_simple_request(fm, &args);
if (!ret && !size)
- ret = min_t(ssize_t, outarg.size, XATTR_LIST_MAX);
+ ret = min_t(size_t, outarg.size, XATTR_LIST_MAX);
if (ret > 0 && size)
ret = fuse_verify_xattr_list(list, ret);
if (ret == -ENOSYS) {
diff --git a/fs/netfs/fscache_main.c b/fs/netfs/fscache_main.c
index 42e98bb523e3..49849005eb7c 100644
--- a/fs/netfs/fscache_main.c
+++ b/fs/netfs/fscache_main.c
@@ -103,6 +103,7 @@ void __exit fscache_exit(void)
kmem_cache_destroy(fscache_cookie_jar);
fscache_proc_cleanup();
+ timer_shutdown_sync(&fscache_cookie_lru_timer);
destroy_workqueue(fscache_wq);
pr_notice("FS-Cache unloaded\n");
}
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index b638dc06df2f..61e25a980f73 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -716,6 +716,33 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs,
}
/**
+ * nilfs_abort_roll_forward - cleaning up after a failed rollforward recovery
+ * @nilfs: nilfs object
+ */
+static void nilfs_abort_roll_forward(struct the_nilfs *nilfs)
+{
+ struct nilfs_inode_info *ii, *n;
+ LIST_HEAD(head);
+
+ /* Abandon inodes that have read recovery data */
+ spin_lock(&nilfs->ns_inode_lock);
+ list_splice_init(&nilfs->ns_dirty_files, &head);
+ spin_unlock(&nilfs->ns_inode_lock);
+ if (list_empty(&head))
+ return;
+
+ set_nilfs_purging(nilfs);
+ list_for_each_entry_safe(ii, n, &head, i_dirty) {
+ spin_lock(&nilfs->ns_inode_lock);
+ list_del_init(&ii->i_dirty);
+ spin_unlock(&nilfs->ns_inode_lock);
+
+ iput(&ii->vfs_inode);
+ }
+ clear_nilfs_purging(nilfs);
+}
+
+/**
* nilfs_salvage_orphan_logs - salvage logs written after the latest checkpoint
* @nilfs: nilfs object
* @sb: super block instance
@@ -773,15 +800,19 @@ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs,
if (unlikely(err)) {
nilfs_err(sb, "error %d writing segment for recovery",
err);
- goto failed;
+ goto put_root;
}
nilfs_finish_roll_forward(nilfs, ri);
}
- failed:
+put_root:
nilfs_put_root(root);
return err;
+
+failed:
+ nilfs_abort_roll_forward(nilfs);
+ goto put_root;
}
/**
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 0ca3110d6386..871ec35ea8e8 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -1812,6 +1812,9 @@ static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci,
nilfs_abort_logs(&logs, ret ? : err);
list_splice_tail_init(&sci->sc_segbufs, &logs);
+ if (list_empty(&logs))
+ return; /* if the first segment buffer preparation failed */
+
nilfs_cancel_segusage(&logs, nilfs->ns_sufile);
nilfs_free_incomplete_logs(&logs, nilfs);
@@ -2056,7 +2059,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
err = nilfs_segctor_begin_construction(sci, nilfs);
if (unlikely(err))
- goto out;
+ goto failed;
/* Update time stamp */
sci->sc_seg_ctime = ktime_get_real_seconds();
@@ -2120,10 +2123,9 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
return err;
failed_to_write:
- if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED)
- nilfs_redirty_inodes(&sci->sc_dirty_files);
-
failed:
+ if (mode == SC_LSEG_SR && nilfs_sc_cstage_get(sci) >= NILFS_ST_IFILE)
+ nilfs_redirty_inodes(&sci->sc_dirty_files);
if (nilfs_doing_gc())
nilfs_redirty_inodes(&sci->sc_gc_inodes);
nilfs_segctor_abort_construction(sci, nilfs, err);
diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c
index a5569b7f47a3..14868a3dd592 100644
--- a/fs/nilfs2/sysfs.c
+++ b/fs/nilfs2/sysfs.c
@@ -836,9 +836,15 @@ ssize_t nilfs_dev_revision_show(struct nilfs_dev_attr *attr,
struct the_nilfs *nilfs,
char *buf)
{
- struct nilfs_super_block **sbp = nilfs->ns_sbp;
- u32 major = le32_to_cpu(sbp[0]->s_rev_level);
- u16 minor = le16_to_cpu(sbp[0]->s_minor_rev_level);
+ struct nilfs_super_block *raw_sb;
+ u32 major;
+ u16 minor;
+
+ down_read(&nilfs->ns_sem);
+ raw_sb = nilfs->ns_sbp[0];
+ major = le32_to_cpu(raw_sb->s_rev_level);
+ minor = le16_to_cpu(raw_sb->s_minor_rev_level);
+ up_read(&nilfs->ns_sem);
return sysfs_emit(buf, "%d.%d\n", major, minor);
}
@@ -856,8 +862,13 @@ ssize_t nilfs_dev_device_size_show(struct nilfs_dev_attr *attr,
struct the_nilfs *nilfs,
char *buf)
{
- struct nilfs_super_block **sbp = nilfs->ns_sbp;
- u64 dev_size = le64_to_cpu(sbp[0]->s_dev_size);
+ struct nilfs_super_block *raw_sb;
+ u64 dev_size;
+
+ down_read(&nilfs->ns_sem);
+ raw_sb = nilfs->ns_sbp[0];
+ dev_size = le64_to_cpu(raw_sb->s_dev_size);
+ up_read(&nilfs->ns_sem);
return sysfs_emit(buf, "%llu\n", dev_size);
}
@@ -879,9 +890,15 @@ ssize_t nilfs_dev_uuid_show(struct nilfs_dev_attr *attr,
struct the_nilfs *nilfs,
char *buf)
{
- struct nilfs_super_block **sbp = nilfs->ns_sbp;
+ struct nilfs_super_block *raw_sb;
+ ssize_t len;
- return sysfs_emit(buf, "%pUb\n", sbp[0]->s_uuid);
+ down_read(&nilfs->ns_sem);
+ raw_sb = nilfs->ns_sbp[0];
+ len = sysfs_emit(buf, "%pUb\n", raw_sb->s_uuid);
+ up_read(&nilfs->ns_sem);
+
+ return len;
}
static
@@ -889,10 +906,16 @@ ssize_t nilfs_dev_volume_name_show(struct nilfs_dev_attr *attr,
struct the_nilfs *nilfs,
char *buf)
{
- struct nilfs_super_block **sbp = nilfs->ns_sbp;
+ struct nilfs_super_block *raw_sb;
+ ssize_t len;
+
+ down_read(&nilfs->ns_sem);
+ raw_sb = nilfs->ns_sbp[0];
+ len = scnprintf(buf, sizeof(raw_sb->s_volume_name), "%s\n",
+ raw_sb->s_volume_name);
+ up_read(&nilfs->ns_sem);
- return scnprintf(buf, sizeof(sbp[0]->s_volume_name), "%s\n",
- sbp[0]->s_volume_name);
+ return len;
}
static const char dev_readme_str[] =
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index 20846a4d3031..8bdc59251418 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -1690,6 +1690,8 @@ int smb2_sess_setup(struct ksmbd_work *work)
rc = ksmbd_session_register(conn, sess);
if (rc)
goto out_err;
+
+ conn->binding = false;
} else if (conn->dialect >= SMB30_PROT_ID &&
(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL) &&
req->Flags & SMB2_SESSION_REQ_FLAG_BINDING) {
@@ -1768,6 +1770,8 @@ int smb2_sess_setup(struct ksmbd_work *work)
sess = NULL;
goto out_err;
}
+
+ conn->binding = false;
}
work->sess = sess;
diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c
index a84788396daa..aaed9e293b2e 100644
--- a/fs/smb/server/transport_tcp.c
+++ b/fs/smb/server/transport_tcp.c
@@ -624,8 +624,10 @@ int ksmbd_tcp_set_interfaces(char *ifc_list, int ifc_list_sz)
for_each_netdev(&init_net, netdev) {
if (netif_is_bridge_port(netdev))
continue;
- if (!alloc_iface(kstrdup(netdev->name, GFP_KERNEL)))
+ if (!alloc_iface(kstrdup(netdev->name, GFP_KERNEL))) {
+ rtnl_unlock();
return -ENOMEM;
+ }
}
rtnl_unlock();
bind_additional_ifaces = 1;
diff --git a/fs/smb/server/xattr.h b/fs/smb/server/xattr.h
index 16499ca5c82d..fa3e27d6971b 100644
--- a/fs/smb/server/xattr.h
+++ b/fs/smb/server/xattr.h
@@ -76,7 +76,7 @@ struct xattr_acl_entry {
struct xattr_smb_acl {
int count;
int next;
- struct xattr_acl_entry entries[];
+ struct xattr_acl_entry entries[] __counted_by(count);
};
/* 64bytes hash in xattr_ntacl is computed with sha256 */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6549d0979b28..147073601716 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -97,6 +97,10 @@ extern const int mmap_rnd_compat_bits_max;
extern int mmap_rnd_compat_bits __read_mostly;
#endif
+#ifndef PHYSMEM_END
+# define PHYSMEM_END ((1ULL << MAX_PHYSMEM_BITS) - 1)
+#endif
+
#include <asm/page.h>
#include <asm/processor.h>
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index b0875b99e811..d94abba1c716 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -248,6 +248,7 @@ struct resctrl_schema {
/* The number of closid supported by this resource regardless of CDP */
u32 resctrl_arch_get_num_closid(struct rdt_resource *r);
+u32 resctrl_arch_system_num_rmid_idx(void);
int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid);
/*
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 3d64290d24c9..3eedb8c226ad 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -752,7 +752,7 @@ static int kexec_calculate_store_digests(struct kimage *image)
#ifdef CONFIG_CRASH_HOTPLUG
/* Exclude elfcorehdr segment to allow future changes via hotplug */
- if (j == image->elfcorehdr_index)
+ if (i == image->elfcorehdr_index)
continue;
#endif
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 88d08eeb8bc0..fba1229f1de6 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1644,6 +1644,7 @@ static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock,
}
static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock,
+ struct rt_mutex_base *lock,
struct rt_mutex_waiter *w)
{
/*
@@ -1656,10 +1657,10 @@ static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock,
if (build_ww_mutex() && w->ww_ctx)
return;
- /*
- * Yell loudly and stop the task right here.
- */
+ raw_spin_unlock_irq(&lock->wait_lock);
+
WARN(1, "rtmutex deadlock detected\n");
+
while (1) {
set_current_state(TASK_INTERRUPTIBLE);
rt_mutex_schedule();
@@ -1713,7 +1714,7 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
} else {
__set_current_state(TASK_RUNNING);
remove_waiter(lock, waiter);
- rt_mutex_handle_deadlock(ret, chwalk, waiter);
+ rt_mutex_handle_deadlock(ret, chwalk, lock, waiter);
}
/*
diff --git a/kernel/resource.c b/kernel/resource.c
index 14777afb0a99..a83040fde236 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -1826,8 +1826,7 @@ static resource_size_t gfr_start(struct resource *base, resource_size_t size,
if (flags & GFR_DESCENDING) {
resource_size_t end;
- end = min_t(resource_size_t, base->end,
- (1ULL << MAX_PHYSMEM_BITS) - 1);
+ end = min_t(resource_size_t, base->end, PHYSMEM_END);
return end - size + 1;
}
@@ -1844,8 +1843,7 @@ static bool gfr_continue(struct resource *base, resource_size_t addr,
* @size did not wrap 0.
*/
return addr > addr - size &&
- addr <= min_t(resource_size_t, base->end,
- (1ULL << MAX_PHYSMEM_BITS) - 1);
+ addr <= min_t(resource_size_t, base->end, PHYSMEM_END);
}
static resource_size_t gfr_next(resource_size_t addr, resource_size_t size,
diff --git a/lib/codetag.c b/lib/codetag.c
index 5ace625f2328..afa8a2d4f317 100644
--- a/lib/codetag.c
+++ b/lib/codetag.c
@@ -125,7 +125,6 @@ static inline size_t range_size(const struct codetag_type *cttype,
cttype->desc.tag_size;
}
-#ifdef CONFIG_MODULES
static void *get_symbol(struct module *mod, const char *prefix, const char *name)
{
DECLARE_SEQ_BUF(sb, KSYM_NAME_LEN);
@@ -155,6 +154,15 @@ static struct codetag_range get_section_range(struct module *mod,
};
}
+static const char *get_mod_name(__maybe_unused struct module *mod)
+{
+#ifdef CONFIG_MODULES
+ if (mod)
+ return mod->name;
+#endif
+ return "(built-in)";
+}
+
static int codetag_module_init(struct codetag_type *cttype, struct module *mod)
{
struct codetag_range range;
@@ -164,8 +172,7 @@ static int codetag_module_init(struct codetag_type *cttype, struct module *mod)
range = get_section_range(mod, cttype->desc.section);
if (!range.start || !range.stop) {
pr_warn("Failed to load code tags of type %s from the module %s\n",
- cttype->desc.section,
- mod ? mod->name : "(built-in)");
+ cttype->desc.section, get_mod_name(mod));
return -EINVAL;
}
@@ -199,6 +206,7 @@ static int codetag_module_init(struct codetag_type *cttype, struct module *mod)
return 0;
}
+#ifdef CONFIG_MODULES
void codetag_load_module(struct module *mod)
{
struct codetag_type *cttype;
@@ -248,9 +256,6 @@ bool codetag_unload_module(struct module *mod)
return unload_ok;
}
-
-#else /* CONFIG_MODULES */
-static int codetag_module_init(struct codetag_type *cttype, struct module *mod) { return 0; }
#endif /* CONFIG_MODULES */
struct codetag_type *
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index aa3a5df15b8e..6df3a8b95808 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -7566,14 +7566,14 @@ static void mt_validate_nulls(struct maple_tree *mt)
* 2. The gap is correctly set in the parents
*/
void mt_validate(struct maple_tree *mt)
+ __must_hold(mas->tree->ma_lock)
{
unsigned char end;
MA_STATE(mas, mt, 0, 0);
- rcu_read_lock();
mas_start(&mas);
if (!mas_is_active(&mas))
- goto done;
+ return;
while (!mte_is_leaf(mas.node))
mas_descend(&mas);
@@ -7594,9 +7594,6 @@ void mt_validate(struct maple_tree *mt)
mas_dfs_postorder(&mas, ULONG_MAX);
}
mt_validate_nulls(mt);
-done:
- rcu_read_unlock();
-
}
EXPORT_SYMBOL_GPL(mt_validate);
diff --git a/mm/filemap.c b/mm/filemap.c
index d62150418b91..0ca9c1377b68 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -4231,7 +4231,7 @@ int filemap_invalidate_inode(struct inode *inode, bool flush,
}
/* Wait for writeback to complete on all folios and discard. */
- truncate_inode_pages_range(mapping, start, end);
+ invalidate_inode_pages2_range(mapping, start / PAGE_SIZE, end / PAGE_SIZE);
unlock:
filemap_invalidate_unlock(mapping);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f29157288b7d..d563fb515766 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3613,8 +3613,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
memcg1_soft_limit_reset(memcg);
#ifdef CONFIG_ZSWAP
memcg->zswap_max = PAGE_COUNTER_MAX;
- WRITE_ONCE(memcg->zswap_writeback,
- !parent || READ_ONCE(parent->zswap_writeback));
+ WRITE_ONCE(memcg->zswap_writeback, true);
#endif
page_counter_set_high(&memcg->swap, PAGE_COUNTER_MAX);
if (parent) {
@@ -5320,7 +5319,14 @@ void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size)
bool mem_cgroup_zswap_writeback_enabled(struct mem_cgroup *memcg)
{
/* if zswap is disabled, do not block pages going to the swapping device */
- return !zswap_is_enabled() || !memcg || READ_ONCE(memcg->zswap_writeback);
+ if (!zswap_is_enabled())
+ return true;
+
+ for (; memcg; memcg = parent_mem_cgroup(memcg))
+ if (!READ_ONCE(memcg->zswap_writeback))
+ return false;
+
+ return true;
}
static u64 zswap_current_read(struct cgroup_subsys_state *css,
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 66267c26ca1b..951878ab627a 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1681,7 +1681,7 @@ struct range __weak arch_get_mappable_range(void)
struct range mhp_get_pluggable_range(bool need_mapping)
{
- const u64 max_phys = (1ULL << MAX_PHYSMEM_BITS) - 1;
+ const u64 max_phys = PHYSMEM_END;
struct range mhp_range;
if (need_mapping) {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c565de8f48e9..91ace8ca97e2 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1054,6 +1054,13 @@ __always_inline bool free_pages_prepare(struct page *page,
reset_page_owner(page, order);
page_table_check_free(page, order);
pgalloc_tag_sub(page, 1 << order);
+
+ /*
+ * The page is isolated and accounted for.
+ * Mark the codetag as empty to avoid accounting error
+ * when the page is freed by unpoison_memory().
+ */
+ clear_page_tag_ref(page);
return false;
}
diff --git a/mm/slub.c b/mm/slub.c
index c9d8a2497fd6..a77f354f8325 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2116,6 +2116,10 @@ alloc_tagging_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p,
if (!mem_alloc_profiling_enabled())
return;
+ /* slab->obj_exts might not be NULL if it was created for MEMCG accounting. */
+ if (s->flags & (SLAB_NO_OBJ_EXT | SLAB_NOLEAKTRACE))
+ return;
+
obj_exts = slab_obj_exts(slab);
if (!obj_exts)
return;
diff --git a/mm/sparse.c b/mm/sparse.c
index 0f018c6f9ec5..dc38539f8560 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -129,7 +129,7 @@ static inline int sparse_early_nid(struct mem_section *section)
static void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn,
unsigned long *end_pfn)
{
- unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT);
+ unsigned long max_sparsemem_pfn = (PHYSMEM_END + 1) >> PAGE_SHIFT;
/*
* Sanity checks - do not allow an architecture to pass
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index e54e5c8907fa..acc56c75ba99 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -787,27 +787,30 @@ retry:
}
dst_pmdval = pmdp_get_lockless(dst_pmd);
- /*
- * If the dst_pmd is mapped as THP don't
- * override it and just be strict.
- */
- if (unlikely(pmd_trans_huge(dst_pmdval))) {
- err = -EEXIST;
- break;
- }
if (unlikely(pmd_none(dst_pmdval)) &&
unlikely(__pte_alloc(dst_mm, dst_pmd))) {
err = -ENOMEM;
break;
}
- /* If an huge pmd materialized from under us fail */
- if (unlikely(pmd_trans_huge(*dst_pmd))) {
+ dst_pmdval = pmdp_get_lockless(dst_pmd);
+ /*
+ * If the dst_pmd is THP don't override it and just be strict.
+ * (This includes the case where the PMD used to be THP and
+ * changed back to none after __pte_alloc().)
+ */
+ if (unlikely(!pmd_present(dst_pmdval) || pmd_trans_huge(dst_pmdval) ||
+ pmd_devmap(dst_pmdval))) {
+ err = -EEXIST;
+ break;
+ }
+ if (unlikely(pmd_bad(dst_pmdval))) {
err = -EFAULT;
break;
}
-
- BUG_ON(pmd_none(*dst_pmd));
- BUG_ON(pmd_trans_huge(*dst_pmd));
+ /*
+ * For shmem mappings, khugepaged is allowed to remove page
+ * tables under us; pte_offset_map_lock() will deal with that.
+ */
err = mfill_atomic_pte(dst_pmd, dst_vma, dst_addr,
src_addr, flags, &folio);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index af2de36549d6..a0df1e2e155a 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2191,6 +2191,7 @@ static void purge_vmap_node(struct work_struct *work)
{
struct vmap_node *vn = container_of(work,
struct vmap_node, purge_work);
+ unsigned long nr_purged_pages = 0;
struct vmap_area *va, *n_va;
LIST_HEAD(local_list);
@@ -2208,7 +2209,7 @@ static void purge_vmap_node(struct work_struct *work)
kasan_release_vmalloc(orig_start, orig_end,
va->va_start, va->va_end);
- atomic_long_sub(nr, &vmap_lazy_nr);
+ nr_purged_pages += nr;
vn->nr_purged++;
if (is_vn_id_valid(vn_id) && !vn->skip_populate)
@@ -2219,6 +2220,8 @@ static void purge_vmap_node(struct work_struct *work)
list_add(&va->list, &local_list);
}
+ atomic_long_sub(nr_purged_pages, &vmap_lazy_nr);
+
reclaim_list_global(&local_list);
}
@@ -2626,6 +2629,7 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
vb->dirty_max = 0;
bitmap_set(vb->used_map, 0, (1UL << order));
INIT_LIST_HEAD(&vb->free_list);
+ vb->cpu = raw_smp_processor_id();
xa = addr_to_vb_xa(va->va_start);
vb_idx = addr_to_vb_idx(va->va_start);
@@ -2642,7 +2646,6 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
* integrity together with list_for_each_rcu from read
* side.
*/
- vb->cpu = raw_smp_processor_id();
vbq = per_cpu_ptr(&vmap_block_queue, vb->cpu);
spin_lock(&vbq->lock);
list_add_tail_rcu(&vb->free_list, &vbq->free);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index cfa839284b92..bd489c1af228 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1604,25 +1604,6 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec,
}
-#ifdef CONFIG_CMA
-/*
- * It is waste of effort to scan and reclaim CMA pages if it is not available
- * for current allocation context. Kswapd can not be enrolled as it can not
- * distinguish this scenario by using sc->gfp_mask = GFP_KERNEL
- */
-static bool skip_cma(struct folio *folio, struct scan_control *sc)
-{
- return !current_is_kswapd() &&
- gfp_migratetype(sc->gfp_mask) != MIGRATE_MOVABLE &&
- folio_migratetype(folio) == MIGRATE_CMA;
-}
-#else
-static bool skip_cma(struct folio *folio, struct scan_control *sc)
-{
- return false;
-}
-#endif
-
/*
* Isolating page from the lruvec to fill in @dst list by nr_to_scan times.
*
@@ -1669,8 +1650,7 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
nr_pages = folio_nr_pages(folio);
total_scan += nr_pages;
- if (folio_zonenum(folio) > sc->reclaim_idx ||
- skip_cma(folio, sc)) {
+ if (folio_zonenum(folio) > sc->reclaim_idx) {
nr_skipped[folio_zonenum(folio)] += nr_pages;
move_to = &folios_skipped;
goto move;
@@ -4320,7 +4300,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c
}
/* ineligible */
- if (zone > sc->reclaim_idx || skip_cma(folio, sc)) {
+ if (zone > sc->reclaim_idx) {
gen = folio_inc_gen(lruvec, folio, false);
list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
return true;
diff --git a/scripts/gfp-translate b/scripts/gfp-translate
index 6c9aed17cf56..8385ae0d5af9 100755
--- a/scripts/gfp-translate
+++ b/scripts/gfp-translate
@@ -62,25 +62,57 @@ if [ "$GFPMASK" = "none" ]; then
fi
# Extract GFP flags from the kernel source
-TMPFILE=`mktemp -t gfptranslate-XXXXXX` || exit 1
-grep -q ___GFP $SOURCE/include/linux/gfp_types.h
-if [ $? -eq 0 ]; then
- grep "^#define ___GFP" $SOURCE/include/linux/gfp_types.h | sed -e 's/u$//' | grep -v GFP_BITS > $TMPFILE
-else
- grep "^#define __GFP" $SOURCE/include/linux/gfp_types.h | sed -e 's/(__force gfp_t)//' | sed -e 's/u)/)/' | grep -v GFP_BITS | sed -e 's/)\//) \//' > $TMPFILE
-fi
+TMPFILE=`mktemp -t gfptranslate-XXXXXX.c` || exit 1
-# Parse the flags
-IFS="
-"
echo Source: $SOURCE
echo Parsing: $GFPMASK
-for LINE in `cat $TMPFILE`; do
- MASK=`echo $LINE | awk '{print $3}'`
- if [ $(($GFPMASK&$MASK)) -ne 0 ]; then
- echo $LINE
- fi
-done
-rm -f $TMPFILE
+(
+ cat <<EOF
+#include <stdint.h>
+#include <stdio.h>
+
+// Try to fool compiler.h into not including extra stuff
+#define __ASSEMBLY__ 1
+
+#include <generated/autoconf.h>
+#include <linux/gfp_types.h>
+
+static const char *masks[] = {
+EOF
+
+ sed -nEe 's/^[[:space:]]+(___GFP_.*)_BIT,.*$/\1/p' $SOURCE/include/linux/gfp_types.h |
+ while read b; do
+ cat <<EOF
+#if defined($b) && ($b > 0)
+ [${b}_BIT] = "$b",
+#endif
+EOF
+ done
+
+ cat <<EOF
+};
+
+int main(int argc, char *argv[])
+{
+ unsigned long long mask = $GFPMASK;
+
+ for (int i = 0; i < sizeof(mask) * 8; i++) {
+ unsigned long long bit = 1ULL << i;
+ if (mask & bit)
+ printf("\t%-25s0x%llx\n",
+ (i < ___GFP_LAST_BIT && masks[i]) ?
+ masks[i] : "*** INVALID ***",
+ bit);
+ }
+
+ return 0;
+}
+EOF
+) > $TMPFILE
+
+${CC:-gcc} -Wall -o ${TMPFILE}.bin -I $SOURCE/include $TMPFILE && ${TMPFILE}.bin
+
+rm -f $TMPFILE ${TMPFILE}.bin
+
exit 0
diff --git a/tools/testing/selftests/mm/mseal_test.c b/tools/testing/selftests/mm/mseal_test.c
index a818f010de47..bfcea5cf9a48 100644
--- a/tools/testing/selftests/mm/mseal_test.c
+++ b/tools/testing/selftests/mm/mseal_test.c
@@ -81,17 +81,6 @@ static int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
return sret;
}
-static void *sys_mmap(void *addr, unsigned long len, unsigned long prot,
- unsigned long flags, unsigned long fd, unsigned long offset)
-{
- void *sret;
-
- errno = 0;
- sret = (void *) syscall(__NR_mmap, addr, len, prot,
- flags, fd, offset);
- return sret;
-}
-
static int sys_munmap(void *ptr, size_t size)
{
int sret;
@@ -172,7 +161,7 @@ static void setup_single_address(int size, void **ptrOut)
{
void *ptr;
- ptr = sys_mmap(NULL, size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ ptr = mmap(NULL, size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
*ptrOut = ptr;
}
@@ -181,7 +170,7 @@ static void setup_single_address_rw(int size, void **ptrOut)
void *ptr;
unsigned long mapflags = MAP_ANONYMOUS | MAP_PRIVATE;
- ptr = sys_mmap(NULL, size, PROT_READ | PROT_WRITE, mapflags, -1, 0);
+ ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, mapflags, -1, 0);
*ptrOut = ptr;
}
@@ -205,7 +194,7 @@ bool seal_support(void)
void *ptr;
unsigned long page_size = getpagesize();
- ptr = sys_mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ ptr = mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (ptr == (void *) -1)
return false;
@@ -481,8 +470,8 @@ static void test_seal_zero_address(void)
int prot;
/* use mmap to change protection. */
- ptr = sys_mmap(0, size, PROT_NONE,
- MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ptr = mmap(0, size, PROT_NONE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
FAIL_TEST_IF_FALSE(ptr == 0);
size = get_vma_size(ptr, &prot);
@@ -1209,8 +1198,8 @@ static void test_seal_mmap_overwrite_prot(bool seal)
}
/* use mmap to change protection. */
- ret2 = sys_mmap(ptr, size, PROT_NONE,
- MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ret2 = mmap(ptr, size, PROT_NONE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
if (seal) {
FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
FAIL_TEST_IF_FALSE(errno == EPERM);
@@ -1240,8 +1229,8 @@ static void test_seal_mmap_expand(bool seal)
}
/* use mmap to expand. */
- ret2 = sys_mmap(ptr, size, PROT_READ,
- MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ret2 = mmap(ptr, size, PROT_READ,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
if (seal) {
FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
FAIL_TEST_IF_FALSE(errno == EPERM);
@@ -1268,8 +1257,8 @@ static void test_seal_mmap_shrink(bool seal)
}
/* use mmap to shrink. */
- ret2 = sys_mmap(ptr, 8 * page_size, PROT_READ,
- MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ret2 = mmap(ptr, 8 * page_size, PROT_READ,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
if (seal) {
FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
FAIL_TEST_IF_FALSE(errno == EPERM);
@@ -1650,7 +1639,7 @@ static void test_seal_discard_ro_anon_on_filebacked(bool seal)
ret = fallocate(fd, 0, 0, size);
FAIL_TEST_IF_FALSE(!ret);
- ptr = sys_mmap(NULL, size, PROT_READ, mapflags, fd, 0);
+ ptr = mmap(NULL, size, PROT_READ, mapflags, fd, 0);
FAIL_TEST_IF_FALSE(ptr != MAP_FAILED);
if (seal) {
@@ -1680,7 +1669,7 @@ static void test_seal_discard_ro_anon_on_shared(bool seal)
int ret;
unsigned long mapflags = MAP_ANONYMOUS | MAP_SHARED;
- ptr = sys_mmap(NULL, size, PROT_READ, mapflags, -1, 0);
+ ptr = mmap(NULL, size, PROT_READ, mapflags, -1, 0);
FAIL_TEST_IF_FALSE(ptr != (void *)-1);
if (seal) {
diff --git a/tools/testing/selftests/mm/seal_elf.c b/tools/testing/selftests/mm/seal_elf.c
index 7aa1366063e4..d9f8ba8d5050 100644
--- a/tools/testing/selftests/mm/seal_elf.c
+++ b/tools/testing/selftests/mm/seal_elf.c
@@ -30,17 +30,6 @@ static int sys_mseal(void *start, size_t len)
return sret;
}
-static void *sys_mmap(void *addr, unsigned long len, unsigned long prot,
- unsigned long flags, unsigned long fd, unsigned long offset)
-{
- void *sret;
-
- errno = 0;
- sret = (void *) syscall(__NR_mmap, addr, len, prot,
- flags, fd, offset);
- return sret;
-}
-
static inline int sys_mprotect(void *ptr, size_t size, unsigned long prot)
{
int sret;
@@ -56,7 +45,7 @@ static bool seal_support(void)
void *ptr;
unsigned long page_size = getpagesize();
- ptr = sys_mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ ptr = mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (ptr == (void *) -1)
return false;