aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.mailmap2
-rw-r--r--Documentation/admin-guide/hw-vuln/cross-thread-rsb.rst92
-rw-r--r--Documentation/admin-guide/hw-vuln/index.rst1
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/kernel/cpu/common.c9
-rw-r--r--arch/x86/kvm/x86.c43
-rw-r--r--drivers/of/of_reserved_mem.c3
-rw-r--r--drivers/platform/x86/intel/vsec.c9
-rw-r--r--fs/aio.c4
-rw-r--r--fs/dax.c5
-rw-r--r--fs/squashfs/xattr_id.c2
-rw-r--r--include/linux/mm.h12
-rw-r--r--include/linux/shrinker.h5
-rw-r--r--lib/parser.c39
-rw-r--r--mm/gup.c2
-rw-r--r--mm/kasan/common.c3
-rw-r--r--mm/kasan/generic.c7
-rw-r--r--mm/kasan/shadow.c12
-rw-r--r--mm/ksm.c7
-rw-r--r--mm/memory.c3
-rw-r--r--mm/shrinker_debug.c13
-rw-r--r--mm/swapfile.c20
-rw-r--r--mm/vmscan.c6
-rw-r--r--scripts/gdb/linux/cpus.py2
24 files changed, 244 insertions, 58 deletions
diff --git a/.mailmap b/.mailmap
index 318e63f338b1..a872c9683958 100644
--- a/.mailmap
+++ b/.mailmap
@@ -25,6 +25,8 @@ Aleksey Gorelov <[email protected]>
Alexander Lobakin <[email protected]> <[email protected]>
Alexander Lobakin <[email protected]> <[email protected]>
Alexander Lobakin <[email protected]> <[email protected]>
+Alexander Mikhalitsyn <[email protected]> <[email protected]>
+Alexander Mikhalitsyn <[email protected]> <[email protected]>
Alexandre Belloni <[email protected]> <[email protected]>
Alexei Starovoitov <[email protected]> <[email protected]>
Alexei Starovoitov <[email protected]> <[email protected]>
diff --git a/Documentation/admin-guide/hw-vuln/cross-thread-rsb.rst b/Documentation/admin-guide/hw-vuln/cross-thread-rsb.rst
new file mode 100644
index 000000000000..ec6e9f5bcf9e
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/cross-thread-rsb.rst
@@ -0,0 +1,92 @@
+
+.. SPDX-License-Identifier: GPL-2.0
+
+Cross-Thread Return Address Predictions
+=======================================
+
+Certain AMD and Hygon processors are subject to a cross-thread return address
+predictions vulnerability. When running in SMT mode and one sibling thread
+transitions out of C0 state, the other sibling thread could use return target
+predictions from the sibling thread that transitioned out of C0.
+
+The Spectre v2 mitigations protect the Linux kernel, as it fills the return
+address prediction entries with safe targets when context switching to the idle
+thread. However, KVM does allow a VMM to prevent exiting guest mode when
+transitioning out of C0. This could result in a guest-controlled return target
+being consumed by the sibling thread.
+
+Affected processors
+-------------------
+
+The following CPUs are vulnerable:
+
+ - AMD Family 17h processors
+ - Hygon Family 18h processors
+
+Related CVEs
+------------
+
+The following CVE entry is related to this issue:
+
+ ============== =======================================
+ CVE-2022-27672 Cross-Thread Return Address Predictions
+ ============== =======================================
+
+Problem
+-------
+
+Affected SMT-capable processors support 1T and 2T modes of execution when SMT
+is enabled. In 2T mode, both threads in a core are executing code. For the
+processor core to enter 1T mode, it is required that one of the threads
+requests to transition out of the C0 state. This can be communicated with the
+HLT instruction or with an MWAIT instruction that requests non-C0.
+When the thread re-enters the C0 state, the processor transitions back
+to 2T mode, assuming the other thread is also still in C0 state.
+
+In affected processors, the return address predictor (RAP) is partitioned
+depending on the SMT mode. For instance, in 2T mode each thread uses a private
+16-entry RAP, but in 1T mode, the active thread uses a 32-entry RAP. Upon
+transition between 1T/2T mode, the RAP contents are not modified but the RAP
+pointers (which control the next return target to use for predictions) may
+change. This behavior may result in return targets from one SMT thread being
+used by RET predictions in the sibling thread following a 1T/2T switch. In
+particular, a RET instruction executed immediately after a transition to 1T may
+use a return target from the thread that just became idle. In theory, this
+could lead to information disclosure if the return targets used do not come
+from trustworthy code.
+
+Attack scenarios
+----------------
+
+An attack can be mounted on affected processors by performing a series of CALL
+instructions with targeted return locations and then transitioning out of C0
+state.
+
+Mitigation mechanism
+--------------------
+
+Before entering idle state, the kernel context switches to the idle thread. The
+context switch fills the RAP entries (referred to as the RSB in Linux) with safe
+targets by performing a sequence of CALL instructions.
+
+Prevent a guest VM from directly putting the processor into an idle state by
+intercepting HLT and MWAIT instructions.
+
+Both mitigations are required to fully address this issue.
+
+Mitigation control on the kernel command line
+---------------------------------------------
+
+Use existing Spectre v2 mitigations that will fill the RSB on context switch.
+
+Mitigation control for KVM - module parameter
+---------------------------------------------
+
+By default, the KVM hypervisor mitigates this issue by intercepting guest
+attempts to transition out of C0. A VMM can use the KVM_CAP_X86_DISABLE_EXITS
+capability to override those interceptions, but since this is not common, the
+mitigation that covers this path is not enabled by default.
+
+The mitigation for the KVM_CAP_X86_DISABLE_EXITS capability can be turned on
+using the boolean module parameter mitigate_smt_rsb, e.g.:
+ kvm.mitigate_smt_rsb=1
diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst
index 4df436e7c417..e0614760a99e 100644
--- a/Documentation/admin-guide/hw-vuln/index.rst
+++ b/Documentation/admin-guide/hw-vuln/index.rst
@@ -18,3 +18,4 @@ are configurable at compile, boot or run time.
core-scheduling.rst
l1d_flush.rst
processor_mmio_stale_data.rst
+ cross-thread-rsb.rst
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 61012476d66e..8f39c46197b8 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -466,5 +466,6 @@
#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */
#define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */
#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
+#define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 9cfca3d7d0e2..f3cc7699e1e1 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1256,6 +1256,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
#define MMIO_SBDS BIT(2)
/* CPU is affected by RETbleed, speculating where you would not expect it */
#define RETBLEED BIT(3)
+/* CPU is affected by SMT (cross-thread) return predictions */
+#define SMT_RSB BIT(4)
static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS),
@@ -1287,8 +1289,8 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_AMD(0x15, RETBLEED),
VULNBL_AMD(0x16, RETBLEED),
- VULNBL_AMD(0x17, RETBLEED),
- VULNBL_HYGON(0x18, RETBLEED),
+ VULNBL_AMD(0x17, RETBLEED | SMT_RSB),
+ VULNBL_HYGON(0x18, RETBLEED | SMT_RSB),
{}
};
@@ -1406,6 +1408,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
!(ia32_cap & ARCH_CAP_PBRSB_NO))
setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB);
+ if (cpu_matches(cpu_vuln_blacklist, SMT_RSB))
+ setup_force_cpu_bug(X86_BUG_SMT_RSB);
+
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index da4bbd043a7b..f0fa3de2ceb8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -191,6 +191,10 @@ module_param(enable_pmu, bool, 0444);
bool __read_mostly eager_page_split = true;
module_param(eager_page_split, bool, 0644);
+/* Enable/disable SMT_RSB bug mitigation */
+bool __read_mostly mitigate_smt_rsb;
+module_param(mitigate_smt_rsb, bool, 0444);
+
/*
* Restoring the host value for MSRs that are only consumed when running in
* usermode, e.g. SYSCALL MSRs and TSC_AUX, can be deferred until the CPU
@@ -4448,10 +4452,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = KVM_CLOCK_VALID_FLAGS;
break;
case KVM_CAP_X86_DISABLE_EXITS:
- r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE |
- KVM_X86_DISABLE_EXITS_CSTATE;
- if(kvm_can_mwait_in_guest())
- r |= KVM_X86_DISABLE_EXITS_MWAIT;
+ r = KVM_X86_DISABLE_EXITS_PAUSE;
+
+ if (!mitigate_smt_rsb) {
+ r |= KVM_X86_DISABLE_EXITS_HLT |
+ KVM_X86_DISABLE_EXITS_CSTATE;
+
+ if (kvm_can_mwait_in_guest())
+ r |= KVM_X86_DISABLE_EXITS_MWAIT;
+ }
break;
case KVM_CAP_X86_SMM:
if (!IS_ENABLED(CONFIG_KVM_SMM))
@@ -6227,15 +6236,26 @@ split_irqchip_unlock:
if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS)
break;
- if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) &&
- kvm_can_mwait_in_guest())
- kvm->arch.mwait_in_guest = true;
- if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT)
- kvm->arch.hlt_in_guest = true;
if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE)
kvm->arch.pause_in_guest = true;
- if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE)
- kvm->arch.cstate_in_guest = true;
+
+#define SMT_RSB_MSG "This processor is affected by the Cross-Thread Return Predictions vulnerability. " \
+ "KVM_CAP_X86_DISABLE_EXITS should only be used with SMT disabled or trusted guests."
+
+ if (!mitigate_smt_rsb) {
+ if (boot_cpu_has_bug(X86_BUG_SMT_RSB) && cpu_smt_possible() &&
+ (cap->args[0] & ~KVM_X86_DISABLE_EXITS_PAUSE))
+ pr_warn_once(SMT_RSB_MSG);
+
+ if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) &&
+ kvm_can_mwait_in_guest())
+ kvm->arch.mwait_in_guest = true;
+ if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT)
+ kvm->arch.hlt_in_guest = true;
+ if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE)
+ kvm->arch.cstate_in_guest = true;
+ }
+
r = 0;
break;
case KVM_CAP_MSR_PLATFORM_INFO:
@@ -13456,6 +13476,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_exit);
static int __init kvm_x86_init(void)
{
kvm_mmu_x86_module_init();
+ mitigate_smt_rsb &= boot_cpu_has_bug(X86_BUG_SMT_RSB) && cpu_smt_possible();
return 0;
}
module_init(kvm_x86_init);
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index 65f3b02a0e4e..f90975e00446 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -48,9 +48,10 @@ static int __init early_init_dt_alloc_reserved_memory_arch(phys_addr_t size,
err = memblock_mark_nomap(base, size);
if (err)
memblock_phys_free(base, size);
- kmemleak_ignore_phys(base);
}
+ kmemleak_ignore_phys(base);
+
return err;
}
diff --git a/drivers/platform/x86/intel/vsec.c b/drivers/platform/x86/intel/vsec.c
index bb81b8b1f7e9..89c5374e33b3 100644
--- a/drivers/platform/x86/intel/vsec.c
+++ b/drivers/platform/x86/intel/vsec.c
@@ -408,14 +408,23 @@ static const struct intel_vsec_platform_info dg1_info = {
.quirks = VSEC_QUIRK_NO_DVSEC | VSEC_QUIRK_EARLY_HW,
};
+/* MTL info */
+static const struct intel_vsec_platform_info mtl_info = {
+ .quirks = VSEC_QUIRK_NO_WATCHER | VSEC_QUIRK_NO_CRASHLOG,
+};
+
#define PCI_DEVICE_ID_INTEL_VSEC_ADL 0x467d
#define PCI_DEVICE_ID_INTEL_VSEC_DG1 0x490e
+#define PCI_DEVICE_ID_INTEL_VSEC_MTL_M 0x7d0d
+#define PCI_DEVICE_ID_INTEL_VSEC_MTL_S 0xad0d
#define PCI_DEVICE_ID_INTEL_VSEC_OOBMSM 0x09a7
#define PCI_DEVICE_ID_INTEL_VSEC_RPL 0xa77d
#define PCI_DEVICE_ID_INTEL_VSEC_TGL 0x9a0d
static const struct pci_device_id intel_vsec_pci_ids[] = {
{ PCI_DEVICE_DATA(INTEL, VSEC_ADL, &tgl_info) },
{ PCI_DEVICE_DATA(INTEL, VSEC_DG1, &dg1_info) },
+ { PCI_DEVICE_DATA(INTEL, VSEC_MTL_M, &mtl_info) },
+ { PCI_DEVICE_DATA(INTEL, VSEC_MTL_S, &mtl_info) },
{ PCI_DEVICE_DATA(INTEL, VSEC_OOBMSM, &(struct intel_vsec_platform_info) {}) },
{ PCI_DEVICE_DATA(INTEL, VSEC_RPL, &tgl_info) },
{ PCI_DEVICE_DATA(INTEL, VSEC_TGL, &tgl_info) },
diff --git a/fs/aio.c b/fs/aio.c
index 562916d85cba..e85ba0b77f59 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -361,6 +361,9 @@ static int aio_ring_mremap(struct vm_area_struct *vma)
spin_lock(&mm->ioctx_lock);
rcu_read_lock();
table = rcu_dereference(mm->ioctx_table);
+ if (!table)
+ goto out_unlock;
+
for (i = 0; i < table->nr; i++) {
struct kioctx *ctx;
@@ -374,6 +377,7 @@ static int aio_ring_mremap(struct vm_area_struct *vma)
}
}
+out_unlock:
rcu_read_unlock();
spin_unlock(&mm->ioctx_lock);
return res;
diff --git a/fs/dax.c b/fs/dax.c
index c48a3a93ab29..3e457a16c7d1 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1271,8 +1271,9 @@ static s64 dax_unshare_iter(struct iomap_iter *iter)
if (ret < 0)
goto out_unlock;
- ret = copy_mc_to_kernel(daddr, saddr, length);
- if (ret)
+ if (copy_mc_to_kernel(daddr, saddr, length) == 0)
+ ret = length;
+ else
ret = -EIO;
out_unlock:
diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c
index b88d19e9581e..c8469c656e0d 100644
--- a/fs/squashfs/xattr_id.c
+++ b/fs/squashfs/xattr_id.c
@@ -76,7 +76,7 @@ __le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 table_start,
/* Sanity check values */
/* there is always at least one xattr id */
- if (*xattr_ids <= 0)
+ if (*xattr_ids == 0)
return ERR_PTR(-EINVAL);
len = SQUASHFS_XATTR_BLOCK_BYTES(*xattr_ids);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8f857163ac89..f13f20258ce9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -137,7 +137,7 @@ extern int mmap_rnd_compat_bits __read_mostly;
* define their own version of this macro in <asm/pgtable.h>
*/
#if BITS_PER_LONG == 64
-/* This function must be updated when the size of struct page grows above 80
+/* This function must be updated when the size of struct page grows above 96
* or reduces below 56. The idea that compiler optimizes out switch()
* statement, and only leaves move/store instructions. Also the compiler can
* combine write statements if they are both assignments and can be reordered,
@@ -148,12 +148,18 @@ static inline void __mm_zero_struct_page(struct page *page)
{
unsigned long *_pp = (void *)page;
- /* Check that struct page is either 56, 64, 72, or 80 bytes */
+ /* Check that struct page is either 56, 64, 72, 80, 88 or 96 bytes */
BUILD_BUG_ON(sizeof(struct page) & 7);
BUILD_BUG_ON(sizeof(struct page) < 56);
- BUILD_BUG_ON(sizeof(struct page) > 80);
+ BUILD_BUG_ON(sizeof(struct page) > 96);
switch (sizeof(struct page)) {
+ case 96:
+ _pp[11] = 0;
+ fallthrough;
+ case 88:
+ _pp[10] = 0;
+ fallthrough;
case 80:
_pp[9] = 0;
fallthrough;
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 71310efe2fab..7bde8e1c228a 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -107,7 +107,7 @@ extern void synchronize_shrinkers(void);
#ifdef CONFIG_SHRINKER_DEBUG
extern int shrinker_debugfs_add(struct shrinker *shrinker);
-extern void shrinker_debugfs_remove(struct shrinker *shrinker);
+extern struct dentry *shrinker_debugfs_remove(struct shrinker *shrinker);
extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker,
const char *fmt, ...);
#else /* CONFIG_SHRINKER_DEBUG */
@@ -115,8 +115,9 @@ static inline int shrinker_debugfs_add(struct shrinker *shrinker)
{
return 0;
}
-static inline void shrinker_debugfs_remove(struct shrinker *shrinker)
+static inline struct dentry *shrinker_debugfs_remove(struct shrinker *shrinker)
{
+ return NULL;
}
static inline __printf(2, 3)
int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...)
diff --git a/lib/parser.c b/lib/parser.c
index bcb23484100e..2b5e2b480253 100644
--- a/lib/parser.c
+++ b/lib/parser.c
@@ -11,6 +11,15 @@
#include <linux/slab.h>
#include <linux/string.h>
+/*
+ * max size needed by different bases to express U64
+ * HEX: "0xFFFFFFFFFFFFFFFF" --> 18
+ * DEC: "18446744073709551615" --> 20
+ * OCT: "01777777777777777777777" --> 23
+ * pick the max one to define NUMBER_BUF_LEN
+ */
+#define NUMBER_BUF_LEN 24
+
/**
* match_one - Determines if a string matches a simple pattern
* @s: the string to examine for presence of the pattern
@@ -129,14 +138,12 @@ EXPORT_SYMBOL(match_token);
static int match_number(substring_t *s, int *result, int base)
{
char *endp;
- char *buf;
+ char buf[NUMBER_BUF_LEN];
int ret;
long val;
- buf = match_strdup(s);
- if (!buf)
- return -ENOMEM;
-
+ if (match_strlcpy(buf, s, NUMBER_BUF_LEN) >= NUMBER_BUF_LEN)
+ return -ERANGE;
ret = 0;
val = simple_strtol(buf, &endp, base);
if (endp == buf)
@@ -145,7 +152,6 @@ static int match_number(substring_t *s, int *result, int base)
ret = -ERANGE;
else
*result = (int) val;
- kfree(buf);
return ret;
}
@@ -163,18 +169,15 @@ static int match_number(substring_t *s, int *result, int base)
*/
static int match_u64int(substring_t *s, u64 *result, int base)
{
- char *buf;
+ char buf[NUMBER_BUF_LEN];
int ret;
u64 val;
- buf = match_strdup(s);
- if (!buf)
- return -ENOMEM;
-
+ if (match_strlcpy(buf, s, NUMBER_BUF_LEN) >= NUMBER_BUF_LEN)
+ return -ERANGE;
ret = kstrtoull(buf, base, &val);
if (!ret)
*result = val;
- kfree(buf);
return ret;
}
@@ -206,14 +209,12 @@ EXPORT_SYMBOL(match_int);
*/
int match_uint(substring_t *s, unsigned int *result)
{
- int err = -ENOMEM;
- char *buf = match_strdup(s);
+ char buf[NUMBER_BUF_LEN];
- if (buf) {
- err = kstrtouint(buf, 10, result);
- kfree(buf);
- }
- return err;
+ if (match_strlcpy(buf, s, NUMBER_BUF_LEN) >= NUMBER_BUF_LEN)
+ return -ERANGE;
+
+ return kstrtouint(buf, 10, result);
}
EXPORT_SYMBOL(match_uint);
diff --git a/mm/gup.c b/mm/gup.c
index f45a3a5be53a..7c034514ddd8 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1914,7 +1914,7 @@ static unsigned long collect_longterm_unpinnable_pages(
drain_allow = false;
}
- if (!folio_isolate_lru(folio))
+ if (folio_isolate_lru(folio))
continue;
list_add_tail(&folio->lru, movable_page_list);
diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index 833bf2cfd2a3..21e66d7f261d 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -246,6 +246,9 @@ bool __kasan_slab_free(struct kmem_cache *cache, void *object,
static inline bool ____kasan_kfree_large(void *ptr, unsigned long ip)
{
+ if (!kasan_arch_is_ready())
+ return false;
+
if (ptr != page_address(virt_to_head_page(ptr))) {
kasan_report_invalid_free(ptr, ip, KASAN_REPORT_INVALID_FREE);
return true;
diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c
index b076f597a378..cb762982c8ba 100644
--- a/mm/kasan/generic.c
+++ b/mm/kasan/generic.c
@@ -191,7 +191,12 @@ bool kasan_check_range(unsigned long addr, size_t size, bool write,
bool kasan_byte_accessible(const void *addr)
{
- s8 shadow_byte = READ_ONCE(*(s8 *)kasan_mem_to_shadow(addr));
+ s8 shadow_byte;
+
+ if (!kasan_arch_is_ready())
+ return true;
+
+ shadow_byte = READ_ONCE(*(s8 *)kasan_mem_to_shadow(addr));
return shadow_byte >= 0 && shadow_byte < KASAN_GRANULE_SIZE;
}
diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c
index 2fba1f51f042..15cfb34d16a1 100644
--- a/mm/kasan/shadow.c
+++ b/mm/kasan/shadow.c
@@ -291,6 +291,9 @@ int kasan_populate_vmalloc(unsigned long addr, unsigned long size)
unsigned long shadow_start, shadow_end;
int ret;
+ if (!kasan_arch_is_ready())
+ return 0;
+
if (!is_vmalloc_or_module_addr((void *)addr))
return 0;
@@ -459,6 +462,9 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end,
unsigned long region_start, region_end;
unsigned long size;
+ if (!kasan_arch_is_ready())
+ return;
+
region_start = ALIGN(start, KASAN_MEMORY_PER_SHADOW_PAGE);
region_end = ALIGN_DOWN(end, KASAN_MEMORY_PER_SHADOW_PAGE);
@@ -502,6 +508,9 @@ void *__kasan_unpoison_vmalloc(const void *start, unsigned long size,
* with setting memory tags, so the KASAN_VMALLOC_INIT flag is ignored.
*/
+ if (!kasan_arch_is_ready())
+ return (void *)start;
+
if (!is_vmalloc_or_module_addr(start))
return (void *)start;
@@ -524,6 +533,9 @@ void *__kasan_unpoison_vmalloc(const void *start, unsigned long size,
*/
void __kasan_poison_vmalloc(const void *start, unsigned long size)
{
+ if (!kasan_arch_is_ready())
+ return;
+
if (!is_vmalloc_or_module_addr(start))
return;
diff --git a/mm/ksm.c b/mm/ksm.c
index dd02780c387f..addf490da146 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -2629,8 +2629,11 @@ struct page *ksm_might_need_to_copy(struct page *page,
new_page = NULL;
}
if (new_page) {
- copy_user_highpage(new_page, page, address, vma);
-
+ if (copy_mc_user_highpage(new_page, page, address, vma)) {
+ put_page(new_page);
+ memory_failure_queue(page_to_pfn(page), 0);
+ return ERR_PTR(-EHWPOISON);
+ }
SetPageDirty(new_page);
__SetPageUptodate(new_page);
__SetPageLocked(new_page);
diff --git a/mm/memory.c b/mm/memory.c
index 3e836fecd035..f526b9152bef 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3840,6 +3840,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
if (unlikely(!page)) {
ret = VM_FAULT_OOM;
goto out_page;
+ } else if (unlikely(PTR_ERR(page) == -EHWPOISON)) {
+ ret = VM_FAULT_HWPOISON;
+ goto out_page;
}
folio = page_folio(page);
diff --git a/mm/shrinker_debug.c b/mm/shrinker_debug.c
index b05295bab322..39c3491e28a3 100644
--- a/mm/shrinker_debug.c
+++ b/mm/shrinker_debug.c
@@ -246,18 +246,21 @@ int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...)
}
EXPORT_SYMBOL(shrinker_debugfs_rename);
-void shrinker_debugfs_remove(struct shrinker *shrinker)
+struct dentry *shrinker_debugfs_remove(struct shrinker *shrinker)
{
+ struct dentry *entry = shrinker->debugfs_entry;
+
lockdep_assert_held(&shrinker_rwsem);
kfree_const(shrinker->name);
shrinker->name = NULL;
- if (!shrinker->debugfs_entry)
- return;
+ if (entry) {
+ ida_free(&shrinker_debugfs_ida, shrinker->debugfs_id);
+ shrinker->debugfs_entry = NULL;
+ }
- debugfs_remove_recursive(shrinker->debugfs_entry);
- ida_free(&shrinker_debugfs_ida, shrinker->debugfs_id);
+ return entry;
}
static int __init shrinker_debugfs_init(void)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 4fa440e87cd6..eb9b0bf1fcdd 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1764,12 +1764,15 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
struct page *swapcache;
spinlock_t *ptl;
pte_t *pte, new_pte;
+ bool hwposioned = false;
int ret = 1;
swapcache = page;
page = ksm_might_need_to_copy(page, vma, addr);
if (unlikely(!page))
return -ENOMEM;
+ else if (unlikely(PTR_ERR(page) == -EHWPOISON))
+ hwposioned = true;
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
if (unlikely(!pte_same_as_swp(*pte, swp_entry_to_pte(entry)))) {
@@ -1777,15 +1780,19 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
goto out;
}
- if (unlikely(!PageUptodate(page))) {
- pte_t pteval;
+ if (unlikely(hwposioned || !PageUptodate(page))) {
+ swp_entry_t swp_entry;
dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
- pteval = swp_entry_to_pte(make_swapin_error_entry());
- set_pte_at(vma->vm_mm, addr, pte, pteval);
- swap_free(entry);
+ if (hwposioned) {
+ swp_entry = make_hwpoison_entry(swapcache);
+ page = swapcache;
+ } else {
+ swp_entry = make_swapin_error_entry();
+ }
+ new_pte = swp_entry_to_pte(swp_entry);
ret = 0;
- goto out;
+ goto setpte;
}
/* See do_swap_page() */
@@ -1817,6 +1824,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
new_pte = pte_mksoft_dirty(new_pte);
if (pte_swp_uffd_wp(*pte))
new_pte = pte_mkuffd_wp(new_pte);
+setpte:
set_pte_at(vma->vm_mm, addr, pte, new_pte);
swap_free(entry);
out:
diff --git a/mm/vmscan.c b/mm/vmscan.c
index bf3eedf0209c..5b7b8d4f5297 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -741,6 +741,8 @@ EXPORT_SYMBOL(register_shrinker);
*/
void unregister_shrinker(struct shrinker *shrinker)
{
+ struct dentry *debugfs_entry;
+
if (!(shrinker->flags & SHRINKER_REGISTERED))
return;
@@ -749,9 +751,11 @@ void unregister_shrinker(struct shrinker *shrinker)
shrinker->flags &= ~SHRINKER_REGISTERED;
if (shrinker->flags & SHRINKER_MEMCG_AWARE)
unregister_memcg_shrinker(shrinker);
- shrinker_debugfs_remove(shrinker);
+ debugfs_entry = shrinker_debugfs_remove(shrinker);
up_write(&shrinker_rwsem);
+ debugfs_remove_recursive(debugfs_entry);
+
kfree(shrinker->nr_deferred);
shrinker->nr_deferred = NULL;
}
diff --git a/scripts/gdb/linux/cpus.py b/scripts/gdb/linux/cpus.py
index 15fc4626d236..9ee99f9fae8d 100644
--- a/scripts/gdb/linux/cpus.py
+++ b/scripts/gdb/linux/cpus.py
@@ -163,7 +163,7 @@ def get_current_task(cpu):
task_ptr_type = task_type.get_type().pointer()
if utils.is_target_arch("x86"):
- var_ptr = gdb.parse_and_eval("&current_task")
+ var_ptr = gdb.parse_and_eval("&pcpu_hot.current_task")
return per_cpu(var_ptr, cpu).dereference()
elif utils.is_target_arch("aarch64"):
current_task_addr = gdb.parse_and_eval("$SP_EL0")