aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/vm/slub.rst10
-rw-r--r--Makefile5
-rw-r--r--arch/x86/kvm/cpuid.c1
-rw-r--r--arch/x86/kvm/lapic.c3
-rw-r--r--arch/x86/kvm/mmu/mmu.c26
-rw-r--r--arch/x86/kvm/svm/avic.c6
-rw-r--r--arch/x86/kvm/svm/sev.c20
-rw-r--r--arch/x86/kvm/vmx/vmx.c1
-rw-r--r--arch/x86/kvm/x86.c6
-rw-r--r--drivers/dma/Kconfig2
-rw-r--r--drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c3
-rw-r--r--drivers/dma/idxd/cdev.c1
-rw-r--r--drivers/dma/idxd/init.c63
-rw-r--r--drivers/dma/ipu/ipu_irq.c2
-rw-r--r--drivers/dma/mediatek/mtk-uart-apdma.c27
-rw-r--r--drivers/dma/pl330.c6
-rw-r--r--drivers/dma/qcom/Kconfig1
-rw-r--r--drivers/dma/sf-pdma/Kconfig1
-rw-r--r--drivers/dma/sh/rcar-dmac.c2
-rw-r--r--drivers/dma/ste_dma40.c3
-rw-r--r--drivers/dma/stm32-mdma.c4
-rw-r--r--drivers/dma/xilinx/xilinx_dpdma.c31
-rw-r--r--drivers/dma/xilinx/zynqmp_dma.c2
-rw-r--r--fs/afs/main.c4
-rw-r--r--fs/hugetlbfs/inode.c1
-rw-r--r--fs/notify/fanotify/fanotify_user.c4
-rw-r--r--fs/proc/base.c4
-rw-r--r--include/linux/huge_mm.h8
-rw-r--r--include/linux/hugetlb.h8
-rw-r--r--include/linux/mm.h3
-rw-r--r--include/linux/rmap.h1
-rw-r--r--include/linux/swapops.h15
-rw-r--r--include/uapi/asm-generic/unistd.h3
-rw-r--r--kernel/crash_core.c1
-rw-r--r--mm/huge_memory.c56
-rw-r--r--mm/hugetlb.c135
-rw-r--r--mm/internal.h53
-rw-r--r--mm/memory-failure.c36
-rw-r--r--mm/memory.c41
-rw-r--r--mm/migrate.c1
-rw-r--r--mm/page_vma_mapped.c27
-rw-r--r--mm/pgtable-generic.c5
-rw-r--r--mm/rmap.c39
-rw-r--r--mm/slab_common.c3
-rw-r--r--mm/slub.c37
-rw-r--r--mm/sparse.c13
-rw-r--r--mm/swapfile.c2
-rw-r--r--mm/truncate.c43
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c2
-rw-r--r--tools/testing/selftests/kvm/lib/test_util.c38
50 files changed, 593 insertions, 216 deletions
diff --git a/Documentation/vm/slub.rst b/Documentation/vm/slub.rst
index 03f294a638bd..d3028554b1e9 100644
--- a/Documentation/vm/slub.rst
+++ b/Documentation/vm/slub.rst
@@ -181,7 +181,7 @@ SLUB Debug output
Here is a sample of slub debug output::
====================================================================
- BUG kmalloc-8: Redzone overwritten
+ BUG kmalloc-8: Right Redzone overwritten
--------------------------------------------------------------------
INFO: 0xc90f6d28-0xc90f6d2b. First byte 0x00 instead of 0xcc
@@ -189,10 +189,10 @@ Here is a sample of slub debug output::
INFO: Object 0xc90f6d20 @offset=3360 fp=0xc90f6d58
INFO: Allocated in get_modalias+0x61/0xf5 age=53 cpu=1 pid=554
- Bytes b4 0xc90f6d10: 00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ
- Object 0xc90f6d20: 31 30 31 39 2e 30 30 35 1019.005
- Redzone 0xc90f6d28: 00 cc cc cc .
- Padding 0xc90f6d50: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ
+ Bytes b4 (0xc90f6d10): 00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ
+ Object (0xc90f6d20): 31 30 31 39 2e 30 30 35 1019.005
+ Redzone (0xc90f6d28): 00 cc cc cc .
+ Padding (0xc90f6d50): 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ
[<c010523d>] dump_trace+0x63/0x1eb
[<c01053df>] show_trace_log_lvl+0x1a/0x2f
diff --git a/Makefile b/Makefile
index ed669b2d705d..2d7a8df84e2b 100644
--- a/Makefile
+++ b/Makefile
@@ -929,11 +929,14 @@ CC_FLAGS_LTO += -fvisibility=hidden
# Limit inlining across translation units to reduce binary size
KBUILD_LDFLAGS += -mllvm -import-instr-limit=5
-# Check for frame size exceeding threshold during prolog/epilog insertion.
+# Check for frame size exceeding threshold during prolog/epilog insertion
+# when using lld < 13.0.0.
ifneq ($(CONFIG_FRAME_WARN),0)
+ifeq ($(shell test $(CONFIG_LLD_VERSION) -lt 130000; echo $$?),0)
KBUILD_LDFLAGS += -plugin-opt=-warn-stack-size=$(CONFIG_FRAME_WARN)
endif
endif
+endif
ifdef CONFIG_LTO
KBUILD_CFLAGS += -fno-lto $(CC_FLAGS_LTO)
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 9a48f138832d..b4da665bb892 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -655,6 +655,7 @@ static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func)
if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP))
entry->ecx = F(RDPID);
++array->nent;
+ break;
default:
break;
}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 6d72d8f43310..17fa4ab1b834 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1410,6 +1410,9 @@ int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
if (!apic_x2apic_mode(apic))
valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI);
+ if (alignment + len > 4)
+ return 1;
+
if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset)))
return 1;
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 0144c40d09c7..8d5876dfc6b7 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4739,9 +4739,33 @@ static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
context->inject_page_fault = kvm_inject_page_fault;
}
+static union kvm_mmu_role kvm_calc_nested_mmu_role(struct kvm_vcpu *vcpu)
+{
+ union kvm_mmu_role role = kvm_calc_shadow_root_page_role_common(vcpu, false);
+
+ /*
+ * Nested MMUs are used only for walking L2's gva->gpa, they never have
+ * shadow pages of their own and so "direct" has no meaning. Set it
+ * to "true" to try to detect bogus usage of the nested MMU.
+ */
+ role.base.direct = true;
+
+ if (!is_paging(vcpu))
+ role.base.level = 0;
+ else if (is_long_mode(vcpu))
+ role.base.level = is_la57_mode(vcpu) ? PT64_ROOT_5LEVEL :
+ PT64_ROOT_4LEVEL;
+ else if (is_pae(vcpu))
+ role.base.level = PT32E_ROOT_LEVEL;
+ else
+ role.base.level = PT32_ROOT_LEVEL;
+
+ return role;
+}
+
static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
{
- union kvm_mmu_role new_role = kvm_calc_mmu_role_common(vcpu, false);
+ union kvm_mmu_role new_role = kvm_calc_nested_mmu_role(vcpu);
struct kvm_mmu *g_context = &vcpu->arch.nested_mmu;
if (new_role.as_u64 == g_context->mmu_role.as_u64)
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 0e62e6a2438c..5e7e920113f3 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -221,7 +221,7 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
return &avic_physical_id_table[index];
}
-/**
+/*
* Note:
* AVIC hardware walks the nested page table to check permissions,
* but does not use the SPA address specified in the leaf page
@@ -764,7 +764,7 @@ out:
return ret;
}
-/**
+/*
* Note:
* The HW cannot support posting multicast/broadcast
* interrupts to a vCPU. So, we still use legacy interrupt
@@ -1005,7 +1005,7 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu)
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
}
-/**
+/*
* This function is called during VCPU halt/unhalt.
*/
static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index e0ce5da97fc2..8d36f0c73071 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -199,9 +199,19 @@ static void sev_asid_free(struct kvm_sev_info *sev)
sev->misc_cg = NULL;
}
-static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
+static void sev_decommission(unsigned int handle)
{
struct sev_data_decommission decommission;
+
+ if (!handle)
+ return;
+
+ decommission.handle = handle;
+ sev_guest_decommission(&decommission, NULL);
+}
+
+static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
+{
struct sev_data_deactivate deactivate;
if (!handle)
@@ -214,9 +224,7 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
sev_guest_deactivate(&deactivate, NULL);
up_read(&sev_deactivate_lock);
- /* decommission handle */
- decommission.handle = handle;
- sev_guest_decommission(&decommission, NULL);
+ sev_decommission(handle);
}
static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
@@ -341,8 +349,10 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
/* Bind ASID to this guest */
ret = sev_bind_asid(kvm, start.handle, error);
- if (ret)
+ if (ret) {
+ sev_decommission(start.handle);
goto e_free_session;
+ }
/* return handle to userspace */
params.handle = start.handle;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 50b42d7a8a11..c2a779b688e6 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6247,6 +6247,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
switch (kvm_get_apic_mode(vcpu)) {
case LAPIC_MODE_INVALID:
WARN_ONCE(true, "Invalid local APIC state");
+ break;
case LAPIC_MODE_DISABLED:
break;
case LAPIC_MODE_XAPIC:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6d3955a6a763..e0f4a46649d7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7106,7 +7106,10 @@ static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags)
{
- emul_to_vcpu(ctxt)->arch.hflags = emul_flags;
+ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+
+ vcpu->arch.hflags = emul_flags;
+ kvm_mmu_reset_context(vcpu);
}
static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt,
@@ -8258,6 +8261,7 @@ void kvm_arch_exit(void)
kvm_x86_ops.hardware_enable = NULL;
kvm_mmu_module_exit();
free_percpu(user_return_msrs);
+ kmem_cache_destroy(x86_emulator_cache);
kmem_cache_destroy(x86_fpu_cache);
#ifdef CONFIG_KVM_XEN
static_key_deferred_flush(&kvm_xen_enabled);
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 6ab9d9a488a6..39b5b46e880f 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -59,6 +59,7 @@ config DMA_OF
#devices
config ALTERA_MSGDMA
tristate "Altera / Intel mSGDMA Engine"
+ depends on HAS_IOMEM
select DMA_ENGINE
help
Enable support for Altera / Intel mSGDMA controller.
@@ -701,6 +702,7 @@ config XILINX_ZYNQMP_DMA
config XILINX_ZYNQMP_DPDMA
tristate "Xilinx DPDMA Engine"
+ depends on HAS_IOMEM && OF
select DMA_ENGINE
select DMA_VIRTUAL_CHANNELS
help
diff --git a/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c
index 4ec909e0b810..4ae057922ef1 100644
--- a/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c
+++ b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c
@@ -332,6 +332,7 @@ static int __cold dpaa2_qdma_setup(struct fsl_mc_device *ls_dev)
}
if (priv->dpdmai_attr.version.major > DPDMAI_VER_MAJOR) {
+ err = -EINVAL;
dev_err(dev, "DPDMAI major version mismatch\n"
"Found %u.%u, supported version is %u.%u\n",
priv->dpdmai_attr.version.major,
@@ -341,6 +342,7 @@ static int __cold dpaa2_qdma_setup(struct fsl_mc_device *ls_dev)
}
if (priv->dpdmai_attr.version.minor > DPDMAI_VER_MINOR) {
+ err = -EINVAL;
dev_err(dev, "DPDMAI minor version mismatch\n"
"Found %u.%u, supported version is %u.%u\n",
priv->dpdmai_attr.version.major,
@@ -475,6 +477,7 @@ static int __cold dpaa2_qdma_dpio_setup(struct dpaa2_qdma_priv *priv)
ppriv->store =
dpaa2_io_store_create(DPAA2_QDMA_STORE_SIZE, dev);
if (!ppriv->store) {
+ err = -ENOMEM;
dev_err(dev, "dpaa2_io_store_create() failed\n");
goto err_store;
}
diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
index 302cba5ff779..d4419bf1fede 100644
--- a/drivers/dma/idxd/cdev.c
+++ b/drivers/dma/idxd/cdev.c
@@ -110,6 +110,7 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp)
pasid = iommu_sva_get_pasid(sva);
if (pasid == IOMMU_PASID_INVALID) {
iommu_sva_unbind_device(sva);
+ rc = -EINVAL;
goto failed;
}
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index 776fd44aff5f..442d55c11a5f 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -168,6 +168,32 @@ static int idxd_setup_interrupts(struct idxd_device *idxd)
return rc;
}
+static void idxd_cleanup_interrupts(struct idxd_device *idxd)
+{
+ struct pci_dev *pdev = idxd->pdev;
+ struct idxd_irq_entry *irq_entry;
+ int i, msixcnt;
+
+ msixcnt = pci_msix_vec_count(pdev);
+ if (msixcnt <= 0)
+ return;
+
+ irq_entry = &idxd->irq_entries[0];
+ free_irq(irq_entry->vector, irq_entry);
+
+ for (i = 1; i < msixcnt; i++) {
+
+ irq_entry = &idxd->irq_entries[i];
+ if (idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE))
+ idxd_device_release_int_handle(idxd, idxd->int_handles[i],
+ IDXD_IRQ_MSIX);
+ free_irq(irq_entry->vector, irq_entry);
+ }
+
+ idxd_mask_error_interrupts(idxd);
+ pci_free_irq_vectors(pdev);
+}
+
static int idxd_setup_wqs(struct idxd_device *idxd)
{
struct device *dev = &idxd->pdev->dev;
@@ -242,6 +268,7 @@ static int idxd_setup_engines(struct idxd_device *idxd)
engine->idxd = idxd;
device_initialize(&engine->conf_dev);
engine->conf_dev.parent = &idxd->conf_dev;
+ engine->conf_dev.bus = &dsa_bus_type;
engine->conf_dev.type = &idxd_engine_device_type;
rc = dev_set_name(&engine->conf_dev, "engine%d.%d", idxd->id, engine->id);
if (rc < 0) {
@@ -303,6 +330,19 @@ static int idxd_setup_groups(struct idxd_device *idxd)
return rc;
}
+static void idxd_cleanup_internals(struct idxd_device *idxd)
+{
+ int i;
+
+ for (i = 0; i < idxd->max_groups; i++)
+ put_device(&idxd->groups[i]->conf_dev);
+ for (i = 0; i < idxd->max_engines; i++)
+ put_device(&idxd->engines[i]->conf_dev);
+ for (i = 0; i < idxd->max_wqs; i++)
+ put_device(&idxd->wqs[i]->conf_dev);
+ destroy_workqueue(idxd->wq);
+}
+
static int idxd_setup_internals(struct idxd_device *idxd)
{
struct device *dev = &idxd->pdev->dev;
@@ -531,12 +571,12 @@ static int idxd_probe(struct idxd_device *idxd)
dev_dbg(dev, "Loading RO device config\n");
rc = idxd_device_load_config(idxd);
if (rc < 0)
- goto err;
+ goto err_config;
}
rc = idxd_setup_interrupts(idxd);
if (rc)
- goto err;
+ goto err_config;
dev_dbg(dev, "IDXD interrupt setup complete.\n");
@@ -549,6 +589,8 @@ static int idxd_probe(struct idxd_device *idxd)
dev_dbg(dev, "IDXD device %d probed successfully\n", idxd->id);
return 0;
+ err_config:
+ idxd_cleanup_internals(idxd);
err:
if (device_pasid_enabled(idxd))
idxd_disable_system_pasid(idxd);
@@ -556,6 +598,18 @@ static int idxd_probe(struct idxd_device *idxd)
return rc;
}
+static void idxd_cleanup(struct idxd_device *idxd)
+{
+ struct device *dev = &idxd->pdev->dev;
+
+ perfmon_pmu_remove(idxd);
+ idxd_cleanup_interrupts(idxd);
+ idxd_cleanup_internals(idxd);
+ if (device_pasid_enabled(idxd))
+ idxd_disable_system_pasid(idxd);
+ iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA);
+}
+
static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct device *dev = &pdev->dev;
@@ -608,7 +662,7 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
rc = idxd_register_devices(idxd);
if (rc) {
dev_err(dev, "IDXD sysfs setup failed\n");
- goto err;
+ goto err_dev_register;
}
idxd->state = IDXD_DEV_CONF_READY;
@@ -618,6 +672,8 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return 0;
+ err_dev_register:
+ idxd_cleanup(idxd);
err:
pci_iounmap(pdev, idxd->reg_base);
err_iomap:
@@ -787,6 +843,7 @@ module_init(idxd_init_module);
static void __exit idxd_exit_module(void)
{
+ idxd_unregister_driver();
pci_unregister_driver(&idxd_pci_driver);
idxd_cdev_remove();
idxd_unregister_bus_type();
diff --git a/drivers/dma/ipu/ipu_irq.c b/drivers/dma/ipu/ipu_irq.c
index 0d5c42f7bfa4..97d9a6f04f2a 100644
--- a/drivers/dma/ipu/ipu_irq.c
+++ b/drivers/dma/ipu/ipu_irq.c
@@ -230,7 +230,7 @@ out:
}
/**
- * ipu_irq_map() - map an IPU interrupt source to an IRQ number
+ * ipu_irq_unmap() - unmap an IPU interrupt source
* @source: interrupt source bit position (see ipu_irq_map())
* @return: 0 or negative error code
*/
diff --git a/drivers/dma/mediatek/mtk-uart-apdma.c b/drivers/dma/mediatek/mtk-uart-apdma.c
index 27c07350971d..375e7e647df6 100644
--- a/drivers/dma/mediatek/mtk-uart-apdma.c
+++ b/drivers/dma/mediatek/mtk-uart-apdma.c
@@ -131,10 +131,7 @@ static unsigned int mtk_uart_apdma_read(struct mtk_chan *c, unsigned int reg)
static void mtk_uart_apdma_desc_free(struct virt_dma_desc *vd)
{
- struct dma_chan *chan = vd->tx.chan;
- struct mtk_chan *c = to_mtk_uart_apdma_chan(chan);
-
- kfree(c->desc);
+ kfree(container_of(vd, struct mtk_uart_apdma_desc, vd));
}
static void mtk_uart_apdma_start_tx(struct mtk_chan *c)
@@ -207,14 +204,9 @@ static void mtk_uart_apdma_start_rx(struct mtk_chan *c)
static void mtk_uart_apdma_tx_handler(struct mtk_chan *c)
{
- struct mtk_uart_apdma_desc *d = c->desc;
-
mtk_uart_apdma_write(c, VFF_INT_FLAG, VFF_TX_INT_CLR_B);
mtk_uart_apdma_write(c, VFF_INT_EN, VFF_INT_EN_CLR_B);
mtk_uart_apdma_write(c, VFF_EN, VFF_EN_CLR_B);
-
- list_del(&d->vd.node);
- vchan_cookie_complete(&d->vd);
}
static void mtk_uart_apdma_rx_handler(struct mtk_chan *c)
@@ -245,9 +237,17 @@ static void mtk_uart_apdma_rx_handler(struct mtk_chan *c)
c->rx_status = d->avail_len - cnt;
mtk_uart_apdma_write(c, VFF_RPT, wg);
+}
- list_del(&d->vd.node);
- vchan_cookie_complete(&d->vd);
+static void mtk_uart_apdma_chan_complete_handler(struct mtk_chan *c)
+{
+ struct mtk_uart_apdma_desc *d = c->desc;
+
+ if (d) {
+ list_del(&d->vd.node);
+ vchan_cookie_complete(&d->vd);
+ c->desc = NULL;
+ }
}
static irqreturn_t mtk_uart_apdma_irq_handler(int irq, void *dev_id)
@@ -261,6 +261,7 @@ static irqreturn_t mtk_uart_apdma_irq_handler(int irq, void *dev_id)
mtk_uart_apdma_rx_handler(c);
else if (c->dir == DMA_MEM_TO_DEV)
mtk_uart_apdma_tx_handler(c);
+ mtk_uart_apdma_chan_complete_handler(c);
spin_unlock_irqrestore(&c->vc.lock, flags);
return IRQ_HANDLED;
@@ -348,7 +349,7 @@ static struct dma_async_tx_descriptor *mtk_uart_apdma_prep_slave_sg
return NULL;
/* Now allocate and setup the descriptor */
- d = kzalloc(sizeof(*d), GFP_ATOMIC);
+ d = kzalloc(sizeof(*d), GFP_NOWAIT);
if (!d)
return NULL;
@@ -366,7 +367,7 @@ static void mtk_uart_apdma_issue_pending(struct dma_chan *chan)
unsigned long flags;
spin_lock_irqsave(&c->vc.lock, flags);
- if (vchan_issue_pending(&c->vc)) {
+ if (vchan_issue_pending(&c->vc) && !c->desc) {
vd = vchan_next_desc(&c->vc);
c->desc = to_mtk_uart_apdma_desc(&vd->tx);
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index fd8d2bc3be9f..110de8a60058 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -2694,13 +2694,15 @@ static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic(
for (i = 0; i < len / period_len; i++) {
desc = pl330_get_desc(pch);
if (!desc) {
+ unsigned long iflags;
+
dev_err(pch->dmac->ddma.dev, "%s:%d Unable to fetch desc\n",
__func__, __LINE__);
if (!first)
return NULL;
- spin_lock_irqsave(&pl330->pool_lock, flags);
+ spin_lock_irqsave(&pl330->pool_lock, iflags);
while (!list_empty(&first->node)) {
desc = list_entry(first->node.next,
@@ -2710,7 +2712,7 @@ static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic(
list_move_tail(&first->node, &pl330->desc_pool);
- spin_unlock_irqrestore(&pl330->pool_lock, flags);
+ spin_unlock_irqrestore(&pl330->pool_lock, iflags);
return NULL;
}
diff --git a/drivers/dma/qcom/Kconfig b/drivers/dma/qcom/Kconfig
index 365f94eb3b08..3f926a653bd8 100644
--- a/drivers/dma/qcom/Kconfig
+++ b/drivers/dma/qcom/Kconfig
@@ -33,6 +33,7 @@ config QCOM_GPI_DMA
config QCOM_HIDMA_MGMT
tristate "Qualcomm Technologies HIDMA Management support"
+ depends on HAS_IOMEM
select DMA_ENGINE
help
Enable support for the Qualcomm Technologies HIDMA Management.
diff --git a/drivers/dma/sf-pdma/Kconfig b/drivers/dma/sf-pdma/Kconfig
index f8ffa02e279f..ba46a0a15a93 100644
--- a/drivers/dma/sf-pdma/Kconfig
+++ b/drivers/dma/sf-pdma/Kconfig
@@ -1,5 +1,6 @@
config SF_PDMA
tristate "Sifive PDMA controller driver"
+ depends on HAS_IOMEM
select DMA_ENGINE
select DMA_VIRTUAL_CHANNELS
help
diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
index d530c1bf11d9..6885b3dcd7a9 100644
--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -1913,7 +1913,7 @@ static int rcar_dmac_probe(struct platform_device *pdev)
/* Enable runtime PM and initialize the device. */
pm_runtime_enable(&pdev->dev);
- ret = pm_runtime_get_sync(&pdev->dev);
+ ret = pm_runtime_resume_and_get(&pdev->dev);
if (ret < 0) {
dev_err(&pdev->dev, "runtime PM get sync failed (%d)\n", ret);
return ret;
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 265d7c07b348..e1827393143f 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -3675,6 +3675,9 @@ static int __init d40_probe(struct platform_device *pdev)
kfree(base->lcla_pool.base_unaligned);
+ if (base->lcpa_base)
+ iounmap(base->lcpa_base);
+
if (base->phy_lcpa)
release_mem_region(base->phy_lcpa,
base->lcpa_size);
diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c
index 36ba8b43e78d..18cbd1e43c2e 100644
--- a/drivers/dma/stm32-mdma.c
+++ b/drivers/dma/stm32-mdma.c
@@ -1452,7 +1452,7 @@ static int stm32_mdma_alloc_chan_resources(struct dma_chan *c)
return -ENOMEM;
}
- ret = pm_runtime_get_sync(dmadev->ddev.dev);
+ ret = pm_runtime_resume_and_get(dmadev->ddev.dev);
if (ret < 0)
return ret;
@@ -1718,7 +1718,7 @@ static int stm32_mdma_pm_suspend(struct device *dev)
u32 ccr, id;
int ret;
- ret = pm_runtime_get_sync(dev);
+ ret = pm_runtime_resume_and_get(dev);
if (ret < 0)
return ret;
diff --git a/drivers/dma/xilinx/xilinx_dpdma.c b/drivers/dma/xilinx/xilinx_dpdma.c
index 70b29bd079c9..6c709803203a 100644
--- a/drivers/dma/xilinx/xilinx_dpdma.c
+++ b/drivers/dma/xilinx/xilinx_dpdma.c
@@ -113,6 +113,7 @@
#define XILINX_DPDMA_CH_VDO 0x020
#define XILINX_DPDMA_CH_PYLD_SZ 0x024
#define XILINX_DPDMA_CH_DESC_ID 0x028
+#define XILINX_DPDMA_CH_DESC_ID_MASK GENMASK(15, 0)
/* DPDMA descriptor fields */
#define XILINX_DPDMA_DESC_CONTROL_PREEMBLE 0xa5
@@ -866,7 +867,8 @@ static void xilinx_dpdma_chan_queue_transfer(struct xilinx_dpdma_chan *chan)
* will be used, but it should be enough.
*/
list_for_each_entry(sw_desc, &desc->descriptors, node)
- sw_desc->hw.desc_id = desc->vdesc.tx.cookie;
+ sw_desc->hw.desc_id = desc->vdesc.tx.cookie
+ & XILINX_DPDMA_CH_DESC_ID_MASK;
sw_desc = list_first_entry(&desc->descriptors,
struct xilinx_dpdma_sw_desc, node);
@@ -1086,7 +1088,8 @@ static void xilinx_dpdma_chan_vsync_irq(struct xilinx_dpdma_chan *chan)
if (!chan->running || !pending)
goto out;
- desc_id = dpdma_read(chan->reg, XILINX_DPDMA_CH_DESC_ID);
+ desc_id = dpdma_read(chan->reg, XILINX_DPDMA_CH_DESC_ID)
+ & XILINX_DPDMA_CH_DESC_ID_MASK;
/* If the retrigger raced with vsync, retry at the next frame. */
sw_desc = list_first_entry(&pending->descriptors,
@@ -1459,7 +1462,7 @@ static void xilinx_dpdma_enable_irq(struct xilinx_dpdma_device *xdev)
*/
static void xilinx_dpdma_disable_irq(struct xilinx_dpdma_device *xdev)
{
- dpdma_write(xdev->reg, XILINX_DPDMA_IDS, XILINX_DPDMA_INTR_ERR_ALL);
+ dpdma_write(xdev->reg, XILINX_DPDMA_IDS, XILINX_DPDMA_INTR_ALL);
dpdma_write(xdev->reg, XILINX_DPDMA_EIDS, XILINX_DPDMA_EINTR_ALL);
}
@@ -1596,6 +1599,26 @@ static struct dma_chan *of_dma_xilinx_xlate(struct of_phandle_args *dma_spec,
return dma_get_slave_channel(&xdev->chan[chan_id]->vchan.chan);
}
+static void dpdma_hw_init(struct xilinx_dpdma_device *xdev)
+{
+ unsigned int i;
+ void __iomem *reg;
+
+ /* Disable all interrupts */
+ xilinx_dpdma_disable_irq(xdev);
+
+ /* Stop all channels */
+ for (i = 0; i < ARRAY_SIZE(xdev->chan); i++) {
+ reg = xdev->reg + XILINX_DPDMA_CH_BASE
+ + XILINX_DPDMA_CH_OFFSET * i;
+ dpdma_clr(reg, XILINX_DPDMA_CH_CNTL, XILINX_DPDMA_CH_CNTL_ENABLE);
+ }
+
+ /* Clear the interrupt status registers */
+ dpdma_write(xdev->reg, XILINX_DPDMA_ISR, XILINX_DPDMA_INTR_ALL);
+ dpdma_write(xdev->reg, XILINX_DPDMA_EISR, XILINX_DPDMA_EINTR_ALL);
+}
+
static int xilinx_dpdma_probe(struct platform_device *pdev)
{
struct xilinx_dpdma_device *xdev;
@@ -1622,6 +1645,8 @@ static int xilinx_dpdma_probe(struct platform_device *pdev)
if (IS_ERR(xdev->reg))
return PTR_ERR(xdev->reg);
+ dpdma_hw_init(xdev);
+
xdev->irq = platform_get_irq(pdev, 0);
if (xdev->irq < 0) {
dev_err(xdev->dev, "failed to get platform irq\n");
diff --git a/drivers/dma/xilinx/zynqmp_dma.c b/drivers/dma/xilinx/zynqmp_dma.c
index d8419565b92c..5fecf5aa6e85 100644
--- a/drivers/dma/xilinx/zynqmp_dma.c
+++ b/drivers/dma/xilinx/zynqmp_dma.c
@@ -468,7 +468,7 @@ static int zynqmp_dma_alloc_chan_resources(struct dma_chan *dchan)
struct zynqmp_dma_desc_sw *desc;
int i, ret;
- ret = pm_runtime_get_sync(chan->dev);
+ ret = pm_runtime_resume_and_get(chan->dev);
if (ret < 0)
return ret;
diff --git a/fs/afs/main.c b/fs/afs/main.c
index b2975256dadb..179004b15566 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -203,8 +203,8 @@ static int __init afs_init(void)
goto error_fs;
afs_proc_symlink = proc_symlink("fs/afs", NULL, "../self/net/afs");
- if (IS_ERR(afs_proc_symlink)) {
- ret = PTR_ERR(afs_proc_symlink);
+ if (!afs_proc_symlink) {
+ ret = -ENOMEM;
goto error_proc;
}
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 55efd3dd04f6..30dee68458c7 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -735,6 +735,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
__SetPageUptodate(page);
error = huge_add_to_page_cache(page, mapping, index);
if (unlikely(error)) {
+ restore_reserve_on_error(h, &pseudo_vma, addr, page);
put_page(page);
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
goto out;
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index be5b6d2c01e7..64864fb40b40 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -471,7 +471,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
info_type, fanotify_info_name(info),
info->name_len, buf, count);
if (ret < 0)
- return ret;
+ goto out_close_fd;
buf += ret;
count -= ret;
@@ -519,7 +519,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
fanotify_event_object_fh(event),
info_type, dot, dot_len, buf, count);
if (ret < 0)
- return ret;
+ goto out_close_fd;
buf += ret;
count -= ret;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 7118ebe38fa6..9cbd915025ad 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2676,7 +2676,9 @@ out:
#ifdef CONFIG_SECURITY
static int proc_pid_attr_open(struct inode *inode, struct file *file)
{
- return __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS);
+ file->private_data = NULL;
+ __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS);
+ return 0;
}
static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 9626fda5efce..2a8ebe6c222e 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -286,6 +286,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
extern struct page *huge_zero_page;
+extern unsigned long huge_zero_pfn;
static inline bool is_huge_zero_page(struct page *page)
{
@@ -294,7 +295,7 @@ static inline bool is_huge_zero_page(struct page *page)
static inline bool is_huge_zero_pmd(pmd_t pmd)
{
- return is_huge_zero_page(pmd_page(pmd));
+ return READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd) && pmd_present(pmd);
}
static inline bool is_huge_zero_pud(pud_t pud)
@@ -440,6 +441,11 @@ static inline bool is_huge_zero_page(struct page *page)
return false;
}
+static inline bool is_huge_zero_pmd(pmd_t pmd)
+{
+ return false;
+}
+
static inline bool is_huge_zero_pud(pud_t pud)
{
return false;
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index b92f25ccef58..6504346a1947 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -149,6 +149,7 @@ bool hugetlb_reserve_pages(struct inode *inode, long from, long to,
long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
long freed);
bool isolate_huge_page(struct page *page, struct list_head *list);
+int get_hwpoison_huge_page(struct page *page, bool *hugetlb);
void putback_active_hugepage(struct page *page);
void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason);
void free_huge_page(struct page *page);
@@ -339,6 +340,11 @@ static inline bool isolate_huge_page(struct page *page, struct list_head *list)
return false;
}
+static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
+{
+ return 0;
+}
+
static inline void putback_active_hugepage(struct page *page)
{
}
@@ -604,6 +610,8 @@ struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
unsigned long address);
int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
pgoff_t idx);
+void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
+ unsigned long address, struct page *page);
/* arch callback */
int __init __alloc_bootmem_huge_page(struct hstate *h);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c274f75efcf9..8ae31622deef 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1719,6 +1719,7 @@ struct zap_details {
struct address_space *check_mapping; /* Check page->mapping if set */
pgoff_t first_index; /* Lowest page->index to unmap */
pgoff_t last_index; /* Highest page->index to unmap */
+ struct page *single_page; /* Locked page to be unmapped */
};
struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
@@ -1766,6 +1767,7 @@ extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
extern int fixup_user_fault(struct mm_struct *mm,
unsigned long address, unsigned int fault_flags,
bool *unlocked);
+void unmap_mapping_page(struct page *page);
void unmap_mapping_pages(struct address_space *mapping,
pgoff_t start, pgoff_t nr, bool even_cows);
void unmap_mapping_range(struct address_space *mapping,
@@ -1786,6 +1788,7 @@ static inline int fixup_user_fault(struct mm_struct *mm, unsigned long address,
BUG();
return -EFAULT;
}
+static inline void unmap_mapping_page(struct page *page) { }
static inline void unmap_mapping_pages(struct address_space *mapping,
pgoff_t start, pgoff_t nr, bool even_cows) { }
static inline void unmap_mapping_range(struct address_space *mapping,
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index def5c62c93b3..8d04e7deedc6 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -91,6 +91,7 @@ enum ttu_flags {
TTU_SPLIT_HUGE_PMD = 0x4, /* split huge PMD if any */
TTU_IGNORE_MLOCK = 0x8, /* ignore mlock */
+ TTU_SYNC = 0x10, /* avoid racy checks with PVMW_SYNC */
TTU_IGNORE_HWPOISON = 0x20, /* corrupted page is recoverable */
TTU_BATCH_FLUSH = 0x40, /* Batch TLB flushes where possible
* and caller guarantees they will
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index d9b7c9132c2f..6430a94c6981 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -23,6 +23,16 @@
#define SWP_TYPE_SHIFT (BITS_PER_XA_VALUE - MAX_SWAPFILES_SHIFT)
#define SWP_OFFSET_MASK ((1UL << SWP_TYPE_SHIFT) - 1)
+/* Clear all flags but only keep swp_entry_t related information */
+static inline pte_t pte_swp_clear_flags(pte_t pte)
+{
+ if (pte_swp_soft_dirty(pte))
+ pte = pte_swp_clear_soft_dirty(pte);
+ if (pte_swp_uffd_wp(pte))
+ pte = pte_swp_clear_uffd_wp(pte);
+ return pte;
+}
+
/*
* Store a type+offset into a swp_entry_t in an arch-independent format
*/
@@ -66,10 +76,7 @@ static inline swp_entry_t pte_to_swp_entry(pte_t pte)
{
swp_entry_t arch_entry;
- if (pte_swp_soft_dirty(pte))
- pte = pte_swp_clear_soft_dirty(pte);
- if (pte_swp_uffd_wp(pte))
- pte = pte_swp_clear_uffd_wp(pte);
+ pte = pte_swp_clear_flags(pte);
arch_entry = __pte_to_swp_entry(pte);
return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry));
}
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 6de5a7fc066b..d2a942086fcb 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -863,8 +863,7 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise)
__SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2)
#define __NR_mount_setattr 442
__SYSCALL(__NR_mount_setattr, sys_mount_setattr)
-#define __NR_quotactl_path 443
-__SYSCALL(__NR_quotactl_path, sys_quotactl_path)
+/* 443 is reserved for quotactl_path */
#define __NR_landlock_create_ruleset 444
__SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset)
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 825284baaf46..684a6061a13a 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -464,6 +464,7 @@ static int __init crash_save_vmcoreinfo_init(void)
VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
VMCOREINFO_STRUCT_SIZE(mem_section);
VMCOREINFO_OFFSET(mem_section, section_mem_map);
+ VMCOREINFO_NUMBER(SECTION_SIZE_BITS);
VMCOREINFO_NUMBER(MAX_PHYSMEM_BITS);
#endif
VMCOREINFO_STRUCT_SIZE(page);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 63ed6b25deaa..6d2a0119fc58 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -62,6 +62,7 @@ static struct shrinker deferred_split_shrinker;
static atomic_t huge_zero_refcount;
struct page *huge_zero_page __read_mostly;
+unsigned long huge_zero_pfn __read_mostly = ~0UL;
bool transparent_hugepage_enabled(struct vm_area_struct *vma)
{
@@ -98,6 +99,7 @@ retry:
__free_pages(zero_page, compound_order(zero_page));
goto retry;
}
+ WRITE_ONCE(huge_zero_pfn, page_to_pfn(zero_page));
/* We take additional reference here. It will be put back by shrinker */
atomic_set(&huge_zero_refcount, 2);
@@ -147,6 +149,7 @@ static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink,
if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) {
struct page *zero_page = xchg(&huge_zero_page, NULL);
BUG_ON(zero_page == NULL);
+ WRITE_ONCE(huge_zero_pfn, ~0UL);
__free_pages(zero_page, compound_order(zero_page));
return HPAGE_PMD_NR;
}
@@ -2044,7 +2047,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
count_vm_event(THP_SPLIT_PMD);
if (!vma_is_anonymous(vma)) {
- _pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
+ old_pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
/*
* We are going to unmap this huge page. So
* just go ahead and zap it
@@ -2053,16 +2056,25 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
zap_deposited_table(mm, pmd);
if (vma_is_special_huge(vma))
return;
- page = pmd_page(_pmd);
- if (!PageDirty(page) && pmd_dirty(_pmd))
- set_page_dirty(page);
- if (!PageReferenced(page) && pmd_young(_pmd))
- SetPageReferenced(page);
- page_remove_rmap(page, true);
- put_page(page);
+ if (unlikely(is_pmd_migration_entry(old_pmd))) {
+ swp_entry_t entry;
+
+ entry = pmd_to_swp_entry(old_pmd);
+ page = migration_entry_to_page(entry);
+ } else {
+ page = pmd_page(old_pmd);
+ if (!PageDirty(page) && pmd_dirty(old_pmd))
+ set_page_dirty(page);
+ if (!PageReferenced(page) && pmd_young(old_pmd))
+ SetPageReferenced(page);
+ page_remove_rmap(page, true);
+ put_page(page);
+ }
add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR);
return;
- } else if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) {
+ }
+
+ if (is_huge_zero_pmd(*pmd)) {
/*
* FIXME: Do we want to invalidate secondary mmu by calling
* mmu_notifier_invalidate_range() see comments below inside
@@ -2338,17 +2350,17 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
static void unmap_page(struct page *page)
{
- enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK |
+ enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_SYNC |
TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
- bool unmap_success;
VM_BUG_ON_PAGE(!PageHead(page), page);
if (PageAnon(page))
ttu_flags |= TTU_SPLIT_FREEZE;
- unmap_success = try_to_unmap(page, ttu_flags);
- VM_BUG_ON_PAGE(!unmap_success, page);
+ try_to_unmap(page, ttu_flags);
+
+ VM_WARN_ON_ONCE_PAGE(page_mapped(page), page);
}
static void remap_page(struct page *page, unsigned int nr)
@@ -2659,7 +2671,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
struct deferred_split *ds_queue = get_deferred_split_queue(head);
struct anon_vma *anon_vma = NULL;
struct address_space *mapping = NULL;
- int count, mapcount, extra_pins, ret;
+ int extra_pins, ret;
pgoff_t end;
VM_BUG_ON_PAGE(is_huge_zero_page(head), head);
@@ -2718,7 +2730,6 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
}
unmap_page(head);
- VM_BUG_ON_PAGE(compound_mapcount(head), head);
/* block interrupt reentry in xa_lock and spinlock */
local_irq_disable();
@@ -2736,9 +2747,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
/* Prevent deferred_split_scan() touching ->_refcount */
spin_lock(&ds_queue->split_queue_lock);
- count = page_count(head);
- mapcount = total_mapcount(head);
- if (!mapcount && page_ref_freeze(head, 1 + extra_pins)) {
+ if (page_ref_freeze(head, 1 + extra_pins)) {
if (!list_empty(page_deferred_list(head))) {
ds_queue->split_queue_len--;
list_del(page_deferred_list(head));
@@ -2758,16 +2767,9 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
__split_huge_page(page, list, end);
ret = 0;
} else {
- if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) {
- pr_alert("total_mapcount: %u, page_count(): %u\n",
- mapcount, count);
- if (PageTail(page))
- dump_page(head, NULL);
- dump_page(page, "total_mapcount(head) > 0");
- BUG();
- }
spin_unlock(&ds_queue->split_queue_lock);
-fail: if (mapping)
+fail:
+ if (mapping)
xa_unlock(&mapping->i_pages);
local_irq_enable();
remap_page(head, thp_nr_pages(head));
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 5560b50876fb..e0a5f9cbbece 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2121,12 +2121,18 @@ out:
* be restored when a newly allocated huge page must be freed. It is
* to be called after calling vma_needs_reservation to determine if a
* reservation exists.
+ *
+ * vma_del_reservation is used in error paths where an entry in the reserve
+ * map was created during huge page allocation and must be removed. It is to
+ * be called after calling vma_needs_reservation to determine if a reservation
+ * exists.
*/
enum vma_resv_mode {
VMA_NEEDS_RESV,
VMA_COMMIT_RESV,
VMA_END_RESV,
VMA_ADD_RESV,
+ VMA_DEL_RESV,
};
static long __vma_reservation_common(struct hstate *h,
struct vm_area_struct *vma, unsigned long addr,
@@ -2170,11 +2176,21 @@ static long __vma_reservation_common(struct hstate *h,
ret = region_del(resv, idx, idx + 1);
}
break;
+ case VMA_DEL_RESV:
+ if (vma->vm_flags & VM_MAYSHARE) {
+ region_abort(resv, idx, idx + 1, 1);
+ ret = region_del(resv, idx, idx + 1);
+ } else {
+ ret = region_add(resv, idx, idx + 1, 1, NULL, NULL);
+ /* region_add calls of range 1 should never fail. */
+ VM_BUG_ON(ret < 0);
+ }
+ break;
default:
BUG();
}
- if (vma->vm_flags & VM_MAYSHARE)
+ if (vma->vm_flags & VM_MAYSHARE || mode == VMA_DEL_RESV)
return ret;
/*
* We know private mapping must have HPAGE_RESV_OWNER set.
@@ -2222,25 +2238,39 @@ static long vma_add_reservation(struct hstate *h,
return __vma_reservation_common(h, vma, addr, VMA_ADD_RESV);
}
+static long vma_del_reservation(struct hstate *h,
+ struct vm_area_struct *vma, unsigned long addr)
+{
+ return __vma_reservation_common(h, vma, addr, VMA_DEL_RESV);
+}
+
/*
- * This routine is called to restore a reservation on error paths. In the
- * specific error paths, a huge page was allocated (via alloc_huge_page)
- * and is about to be freed. If a reservation for the page existed,
- * alloc_huge_page would have consumed the reservation and set
- * HPageRestoreReserve in the newly allocated page. When the page is freed
- * via free_huge_page, the global reservation count will be incremented if
- * HPageRestoreReserve is set. However, free_huge_page can not adjust the
- * reserve map. Adjust the reserve map here to be consistent with global
- * reserve count adjustments to be made by free_huge_page.
+ * This routine is called to restore reservation information on error paths.
+ * It should ONLY be called for pages allocated via alloc_huge_page(), and
+ * the hugetlb mutex should remain held when calling this routine.
+ *
+ * It handles two specific cases:
+ * 1) A reservation was in place and the page consumed the reservation.
+ * HPageRestoreReserve is set in the page.
+ * 2) No reservation was in place for the page, so HPageRestoreReserve is
+ * not set. However, alloc_huge_page always updates the reserve map.
+ *
+ * In case 1, free_huge_page later in the error path will increment the
+ * global reserve count. But, free_huge_page does not have enough context
+ * to adjust the reservation map. This case deals primarily with private
+ * mappings. Adjust the reserve map here to be consistent with global
+ * reserve count adjustments to be made by free_huge_page. Make sure the
+ * reserve map indicates there is a reservation present.
+ *
+ * In case 2, simply undo reserve map modifications done by alloc_huge_page.
*/
-static void restore_reserve_on_error(struct hstate *h,
- struct vm_area_struct *vma, unsigned long address,
- struct page *page)
+void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
+ unsigned long address, struct page *page)
{
- if (unlikely(HPageRestoreReserve(page))) {
- long rc = vma_needs_reservation(h, vma, address);
+ long rc = vma_needs_reservation(h, vma, address);
- if (unlikely(rc < 0)) {
+ if (HPageRestoreReserve(page)) {
+ if (unlikely(rc < 0))
/*
* Rare out of memory condition in reserve map
* manipulation. Clear HPageRestoreReserve so that
@@ -2253,16 +2283,57 @@ static void restore_reserve_on_error(struct hstate *h,
* accounting of reserve counts.
*/
ClearHPageRestoreReserve(page);
- } else if (rc) {
- rc = vma_add_reservation(h, vma, address);
- if (unlikely(rc < 0))
+ else if (rc)
+ (void)vma_add_reservation(h, vma, address);
+ else
+ vma_end_reservation(h, vma, address);
+ } else {
+ if (!rc) {
+ /*
+ * This indicates there is an entry in the reserve map
+ * added by alloc_huge_page. We know it was added
+ * before the alloc_huge_page call, otherwise
+ * HPageRestoreReserve would be set on the page.
+ * Remove the entry so that a subsequent allocation
+ * does not consume a reservation.
+ */
+ rc = vma_del_reservation(h, vma, address);
+ if (rc < 0)
+ /*
+ * VERY rare out of memory condition. Since
+ * we can not delete the entry, set
+ * HPageRestoreReserve so that the reserve
+ * count will be incremented when the page
+ * is freed. This reserve will be consumed
+ * on a subsequent allocation.
+ */
+ SetHPageRestoreReserve(page);
+ } else if (rc < 0) {
+ /*
+ * Rare out of memory condition from
+ * vma_needs_reservation call. Memory allocation is
+ * only attempted if a new entry is needed. Therefore,
+ * this implies there is not an entry in the
+ * reserve map.
+ *
+ * For shared mappings, no entry in the map indicates
+ * no reservation. We are done.
+ */
+ if (!(vma->vm_flags & VM_MAYSHARE))
/*
- * See above comment about rare out of
- * memory condition.
+ * For private mappings, no entry indicates
+ * a reservation is present. Since we can
+ * not add an entry, set SetHPageRestoreReserve
+ * on the page so reserve count will be
+ * incremented when freed. This reserve will
+ * be consumed on a subsequent allocation.
*/
- ClearHPageRestoreReserve(page);
+ SetHPageRestoreReserve(page);
} else
- vma_end_reservation(h, vma, address);
+ /*
+ * No reservation present, do nothing
+ */
+ vma_end_reservation(h, vma, address);
}
}
@@ -4037,6 +4108,8 @@ again:
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
entry = huge_ptep_get(src_pte);
if (!pte_same(src_pte_old, entry)) {
+ restore_reserve_on_error(h, vma, addr,
+ new);
put_page(new);
/* dst_entry won't change as in child */
goto again;
@@ -5006,6 +5079,7 @@ out_release_unlock:
if (vm_shared || is_continue)
unlock_page(page);
out_release_nounlock:
+ restore_reserve_on_error(h, dst_vma, dst_addr, page);
put_page(page);
goto out;
}
@@ -5857,6 +5931,21 @@ unlock:
return ret;
}
+int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
+{
+ int ret = 0;
+
+ *hugetlb = false;
+ spin_lock_irq(&hugetlb_lock);
+ if (PageHeadHuge(page)) {
+ *hugetlb = true;
+ if (HPageFreed(page) || HPageMigratable(page))
+ ret = get_page_unless_zero(page);
+ }
+ spin_unlock_irq(&hugetlb_lock);
+ return ret;
+}
+
void putback_active_hugepage(struct page *page)
{
spin_lock_irq(&hugetlb_lock);
diff --git a/mm/internal.h b/mm/internal.h
index 2f1182948aa6..e8fdb531f887 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -384,27 +384,52 @@ static inline void mlock_migrate_page(struct page *newpage, struct page *page)
extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
/*
- * At what user virtual address is page expected in @vma?
+ * At what user virtual address is page expected in vma?
+ * Returns -EFAULT if all of the page is outside the range of vma.
+ * If page is a compound head, the entire compound page is considered.
*/
static inline unsigned long
-__vma_address(struct page *page, struct vm_area_struct *vma)
+vma_address(struct page *page, struct vm_area_struct *vma)
{
- pgoff_t pgoff = page_to_pgoff(page);
- return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+ pgoff_t pgoff;
+ unsigned long address;
+
+ VM_BUG_ON_PAGE(PageKsm(page), page); /* KSM page->index unusable */
+ pgoff = page_to_pgoff(page);
+ if (pgoff >= vma->vm_pgoff) {
+ address = vma->vm_start +
+ ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+ /* Check for address beyond vma (or wrapped through 0?) */
+ if (address < vma->vm_start || address >= vma->vm_end)
+ address = -EFAULT;
+ } else if (PageHead(page) &&
+ pgoff + compound_nr(page) - 1 >= vma->vm_pgoff) {
+ /* Test above avoids possibility of wrap to 0 on 32-bit */
+ address = vma->vm_start;
+ } else {
+ address = -EFAULT;
+ }
+ return address;
}
+/*
+ * Then at what user virtual address will none of the page be found in vma?
+ * Assumes that vma_address() already returned a good starting address.
+ * If page is a compound head, the entire compound page is considered.
+ */
static inline unsigned long
-vma_address(struct page *page, struct vm_area_struct *vma)
+vma_address_end(struct page *page, struct vm_area_struct *vma)
{
- unsigned long start, end;
-
- start = __vma_address(page, vma);
- end = start + thp_size(page) - PAGE_SIZE;
-
- /* page should be within @vma mapping range */
- VM_BUG_ON_VMA(end < vma->vm_start || start >= vma->vm_end, vma);
-
- return max(start, vma->vm_start);
+ pgoff_t pgoff;
+ unsigned long address;
+
+ VM_BUG_ON_PAGE(PageKsm(page), page); /* KSM page->index unusable */
+ pgoff = page_to_pgoff(page) + compound_nr(page);
+ address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+ /* Check for address beyond vma (or wrapped through 0?) */
+ if (address < vma->vm_start || address > vma->vm_end)
+ address = vma->vm_end;
+ return address;
}
static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf,
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 85ad98c00fd9..0143d32bc666 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -949,6 +949,17 @@ static int page_action(struct page_state *ps, struct page *p,
return (result == MF_RECOVERED || result == MF_DELAYED) ? 0 : -EBUSY;
}
+/*
+ * Return true if a page type of a given page is supported by hwpoison
+ * mechanism (while handling could fail), otherwise false. This function
+ * does not return true for hugetlb or device memory pages, so it's assumed
+ * to be called only in the context where we never have such pages.
+ */
+static inline bool HWPoisonHandlable(struct page *page)
+{
+ return PageLRU(page) || __PageMovable(page);
+}
+
/**
* __get_hwpoison_page() - Get refcount for memory error handling:
* @page: raw error page (hit by memory error)
@@ -959,8 +970,22 @@ static int page_action(struct page_state *ps, struct page *p,
static int __get_hwpoison_page(struct page *page)
{
struct page *head = compound_head(page);
+ int ret = 0;
+ bool hugetlb = false;
+
+ ret = get_hwpoison_huge_page(head, &hugetlb);
+ if (hugetlb)
+ return ret;
- if (!PageHuge(head) && PageTransHuge(head)) {
+ /*
+ * This check prevents from calling get_hwpoison_unless_zero()
+ * for any unsupported type of page in order to reduce the risk of
+ * unexpected races caused by taking a page refcount.
+ */
+ if (!HWPoisonHandlable(head))
+ return 0;
+
+ if (PageTransHuge(head)) {
/*
* Non anonymous thp exists only in allocation/free time. We
* can't handle such a case correctly, so let's give it up.
@@ -1017,7 +1042,7 @@ try_again:
ret = -EIO;
}
} else {
- if (PageHuge(p) || PageLRU(p) || __PageMovable(p)) {
+ if (PageHuge(p) || HWPoisonHandlable(p)) {
ret = 1;
} else {
/*
@@ -1527,7 +1552,12 @@ try_again:
return 0;
}
- if (!PageTransTail(p) && !PageLRU(p))
+ /*
+ * __munlock_pagevec may clear a writeback page's LRU flag without
+ * page_lock. We need wait writeback completion for this page or it
+ * may trigger vfs BUG while evict inode.
+ */
+ if (!PageTransTail(p) && !PageLRU(p) && !PageWriteback(p))
goto identify_page_state;
/*
diff --git a/mm/memory.c b/mm/memory.c
index f3ffab9b9e39..486f4a2874e7 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1361,7 +1361,18 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
else if (zap_huge_pmd(tlb, vma, pmd, addr))
goto next;
/* fall through */
+ } else if (details && details->single_page &&
+ PageTransCompound(details->single_page) &&
+ next - addr == HPAGE_PMD_SIZE && pmd_none(*pmd)) {
+ spinlock_t *ptl = pmd_lock(tlb->mm, pmd);
+ /*
+ * Take and drop THP pmd lock so that we cannot return
+ * prematurely, while zap_huge_pmd() has cleared *pmd,
+ * but not yet decremented compound_mapcount().
+ */
+ spin_unlock(ptl);
}
+
/*
* Here there can be other concurrent MADV_DONTNEED or
* trans huge page faults running, and if the pmd is
@@ -3237,6 +3248,36 @@ static inline void unmap_mapping_range_tree(struct rb_root_cached *root,
}
/**
+ * unmap_mapping_page() - Unmap single page from processes.
+ * @page: The locked page to be unmapped.
+ *
+ * Unmap this page from any userspace process which still has it mmaped.
+ * Typically, for efficiency, the range of nearby pages has already been
+ * unmapped by unmap_mapping_pages() or unmap_mapping_range(). But once
+ * truncation or invalidation holds the lock on a page, it may find that
+ * the page has been remapped again: and then uses unmap_mapping_page()
+ * to unmap it finally.
+ */
+void unmap_mapping_page(struct page *page)
+{
+ struct address_space *mapping = page->mapping;
+ struct zap_details details = { };
+
+ VM_BUG_ON(!PageLocked(page));
+ VM_BUG_ON(PageTail(page));
+
+ details.check_mapping = mapping;
+ details.first_index = page->index;
+ details.last_index = page->index + thp_nr_pages(page) - 1;
+ details.single_page = page;
+
+ i_mmap_lock_write(mapping);
+ if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
+ unmap_mapping_range_tree(&mapping->i_mmap, &details);
+ i_mmap_unlock_write(mapping);
+}
+
+/**
* unmap_mapping_pages() - Unmap pages from processes.
* @mapping: The address space containing pages to be unmapped.
* @start: Index of first page to be unmapped.
diff --git a/mm/migrate.c b/mm/migrate.c
index b234c3f3acb7..41ff2c9896c4 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -295,6 +295,7 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
goto out;
page = migration_entry_to_page(entry);
+ page = compound_head(page);
/*
* Once page cache replacement of page migration started, page_count
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index 2cf01d933f13..e37bd43904af 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -212,23 +212,34 @@ restart:
pvmw->ptl = NULL;
}
} else if (!pmd_present(pmde)) {
+ /*
+ * If PVMW_SYNC, take and drop THP pmd lock so that we
+ * cannot return prematurely, while zap_huge_pmd() has
+ * cleared *pmd but not decremented compound_mapcount().
+ */
+ if ((pvmw->flags & PVMW_SYNC) &&
+ PageTransCompound(pvmw->page)) {
+ spinlock_t *ptl = pmd_lock(mm, pvmw->pmd);
+
+ spin_unlock(ptl);
+ }
return false;
}
if (!map_pte(pvmw))
goto next_pte;
while (1) {
+ unsigned long end;
+
if (check_pte(pvmw))
return true;
next_pte:
/* Seek to next pte only makes sense for THP */
if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page))
return not_found(pvmw);
+ end = vma_address_end(pvmw->page, pvmw->vma);
do {
pvmw->address += PAGE_SIZE;
- if (pvmw->address >= pvmw->vma->vm_end ||
- pvmw->address >=
- __vma_address(pvmw->page, pvmw->vma) +
- thp_size(pvmw->page))
+ if (pvmw->address >= end)
return not_found(pvmw);
/* Did we cross page table boundary? */
if (pvmw->address % PMD_SIZE == 0) {
@@ -266,14 +277,10 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
.vma = vma,
.flags = PVMW_SYNC,
};
- unsigned long start, end;
-
- start = __vma_address(page, vma);
- end = start + thp_size(page) - PAGE_SIZE;
- if (unlikely(end < vma->vm_start || start >= vma->vm_end))
+ pvmw.address = vma_address(page, vma);
+ if (pvmw.address == -EFAULT)
return 0;
- pvmw.address = max(start, vma->vm_start);
if (!page_vma_mapped_walk(&pvmw))
return 0;
page_vma_mapped_walk_done(&pvmw);
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index c2210e1cdb51..4e640baf9794 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -135,9 +135,8 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address,
{
pmd_t pmd;
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
- VM_BUG_ON(!pmd_present(*pmdp));
- /* Below assumes pmd_present() is true */
- VM_BUG_ON(!pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
+ VM_BUG_ON(pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) &&
+ !pmd_devmap(*pmdp));
pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
return pmd;
diff --git a/mm/rmap.c b/mm/rmap.c
index 693a610e181d..e05c300048e6 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -707,7 +707,6 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
*/
unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
{
- unsigned long address;
if (PageAnon(page)) {
struct anon_vma *page__anon_vma = page_anon_vma(page);
/*
@@ -717,15 +716,13 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
if (!vma->anon_vma || !page__anon_vma ||
vma->anon_vma->root != page__anon_vma->root)
return -EFAULT;
- } else if (page->mapping) {
- if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping)
- return -EFAULT;
- } else
+ } else if (!vma->vm_file) {
return -EFAULT;
- address = __vma_address(page, vma);
- if (unlikely(address < vma->vm_start || address >= vma->vm_end))
+ } else if (vma->vm_file->f_mapping != compound_head(page)->mapping) {
return -EFAULT;
- return address;
+ }
+
+ return vma_address(page, vma);
}
pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
@@ -919,7 +916,7 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
*/
mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE,
0, vma, vma->vm_mm, address,
- min(vma->vm_end, address + page_size(page)));
+ vma_address_end(page, vma));
mmu_notifier_invalidate_range_start(&range);
while (page_vma_mapped_walk(&pvmw)) {
@@ -1405,6 +1402,15 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
struct mmu_notifier_range range;
enum ttu_flags flags = (enum ttu_flags)(long)arg;
+ /*
+ * When racing against e.g. zap_pte_range() on another cpu,
+ * in between its ptep_get_and_clear_full() and page_remove_rmap(),
+ * try_to_unmap() may return false when it is about to become true,
+ * if page table locking is skipped: use TTU_SYNC to wait for that.
+ */
+ if (flags & TTU_SYNC)
+ pvmw.flags = PVMW_SYNC;
+
/* munlock has nothing to gain from examining un-locked vmas */
if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
return true;
@@ -1426,9 +1432,10 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
* Note that the page can not be free in this function as call of
* try_to_unmap() must hold a reference on the page.
*/
+ range.end = PageKsm(page) ?
+ address + PAGE_SIZE : vma_address_end(page, vma);
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
- address,
- min(vma->vm_end, address + page_size(page)));
+ address, range.end);
if (PageHuge(page)) {
/*
* If sharing is possible, start and end will be adjusted
@@ -1777,7 +1784,13 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags)
else
rmap_walk(page, &rwc);
- return !page_mapcount(page) ? true : false;
+ /*
+ * When racing against e.g. zap_pte_range() on another cpu,
+ * in between its ptep_get_and_clear_full() and page_remove_rmap(),
+ * try_to_unmap() may return false when it is about to become true,
+ * if page table locking is skipped: use TTU_SYNC to wait for that.
+ */
+ return !page_mapcount(page);
}
/**
@@ -1874,6 +1887,7 @@ static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc,
struct vm_area_struct *vma = avc->vma;
unsigned long address = vma_address(page, vma);
+ VM_BUG_ON_VMA(address == -EFAULT, vma);
cond_resched();
if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
@@ -1928,6 +1942,7 @@ static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc,
pgoff_start, pgoff_end) {
unsigned long address = vma_address(page, vma);
+ VM_BUG_ON_VMA(address == -EFAULT, vma);
cond_resched();
if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
diff --git a/mm/slab_common.c b/mm/slab_common.c
index a4a571428c51..7cab77655f11 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -97,8 +97,7 @@ EXPORT_SYMBOL(kmem_cache_size);
#ifdef CONFIG_DEBUG_VM
static int kmem_cache_sanity_check(const char *name, unsigned int size)
{
- if (!name || in_interrupt() || size < sizeof(void *) ||
- size > KMALLOC_MAX_SIZE) {
+ if (!name || in_interrupt() || size > KMALLOC_MAX_SIZE) {
pr_err("kmem_cache_create(%s) integrity check failed\n", name);
return -EINVAL;
}
diff --git a/mm/slub.c b/mm/slub.c
index 3f96e099817a..61bd40e3eb9a 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -15,6 +15,7 @@
#include <linux/module.h>
#include <linux/bit_spinlock.h>
#include <linux/interrupt.h>
+#include <linux/swab.h>
#include <linux/bitops.h>
#include <linux/slab.h>
#include "slab.h"
@@ -712,15 +713,15 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
p, p - addr, get_freepointer(s, p));
if (s->flags & SLAB_RED_ZONE)
- print_section(KERN_ERR, "Redzone ", p - s->red_left_pad,
+ print_section(KERN_ERR, "Redzone ", p - s->red_left_pad,
s->red_left_pad);
else if (p > addr + 16)
print_section(KERN_ERR, "Bytes b4 ", p - 16, 16);
- print_section(KERN_ERR, "Object ", p,
+ print_section(KERN_ERR, "Object ", p,
min_t(unsigned int, s->object_size, PAGE_SIZE));
if (s->flags & SLAB_RED_ZONE)
- print_section(KERN_ERR, "Redzone ", p + s->object_size,
+ print_section(KERN_ERR, "Redzone ", p + s->object_size,
s->inuse - s->object_size);
off = get_info_end(s);
@@ -732,7 +733,7 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
if (off != size_from_object(s))
/* Beginning of the filler is the free pointer */
- print_section(KERN_ERR, "Padding ", p + off,
+ print_section(KERN_ERR, "Padding ", p + off,
size_from_object(s) - off);
dump_stack();
@@ -909,11 +910,11 @@ static int check_object(struct kmem_cache *s, struct page *page,
u8 *endobject = object + s->object_size;
if (s->flags & SLAB_RED_ZONE) {
- if (!check_bytes_and_report(s, page, object, "Redzone",
+ if (!check_bytes_and_report(s, page, object, "Left Redzone",
object - s->red_left_pad, val, s->red_left_pad))
return 0;
- if (!check_bytes_and_report(s, page, object, "Redzone",
+ if (!check_bytes_and_report(s, page, object, "Right Redzone",
endobject, val, s->inuse - s->object_size))
return 0;
} else {
@@ -928,7 +929,7 @@ static int check_object(struct kmem_cache *s, struct page *page,
if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
(!check_bytes_and_report(s, page, p, "Poison", p,
POISON_FREE, s->object_size - 1) ||
- !check_bytes_and_report(s, page, p, "Poison",
+ !check_bytes_and_report(s, page, p, "End Poison",
p + s->object_size - 1, POISON_END, 1)))
return 0;
/*
@@ -3689,7 +3690,6 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
{
slab_flags_t flags = s->flags;
unsigned int size = s->object_size;
- unsigned int freepointer_area;
unsigned int order;
/*
@@ -3698,13 +3698,6 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
* the possible location of the free pointer.
*/
size = ALIGN(size, sizeof(void *));
- /*
- * This is the area of the object where a freepointer can be
- * safely written. If redzoning adds more to the inuse size, we
- * can't use that portion for writing the freepointer, so
- * s->offset must be limited within this for the general case.
- */
- freepointer_area = size;
#ifdef CONFIG_SLUB_DEBUG
/*
@@ -3730,19 +3723,21 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
/*
* With that we have determined the number of bytes in actual use
- * by the object. This is the potential offset to the free pointer.
+ * by the object and redzoning.
*/
s->inuse = size;
- if (((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
- s->ctor)) {
+ if ((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
+ ((flags & SLAB_RED_ZONE) && s->object_size < sizeof(void *)) ||
+ s->ctor) {
/*
* Relocate free pointer after the object if it is not
* permitted to overwrite the first word of the object on
* kmem_cache_free.
*
* This is the case if we do RCU, have a constructor or
- * destructor or are poisoning the objects.
+ * destructor, are poisoning the objects, or are
+ * redzoning an object smaller than sizeof(void *).
*
* The assumption that s->offset >= s->inuse means free
* pointer is outside of the object is used in the
@@ -3751,13 +3746,13 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
*/
s->offset = size;
size += sizeof(void *);
- } else if (freepointer_area > sizeof(void *)) {
+ } else {
/*
* Store freelist pointer near middle of object to keep
* it away from the edges of the object to avoid small
* sized over/underflows from neighboring allocations.
*/
- s->offset = ALIGN(freepointer_area / 2, sizeof(void *));
+ s->offset = ALIGN_DOWN(s->object_size / 2, sizeof(void *));
}
#ifdef CONFIG_SLUB_DEBUG
diff --git a/mm/sparse.c b/mm/sparse.c
index b2ada9dc00cb..55c18aff3e42 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -344,6 +344,15 @@ size_t mem_section_usage_size(void)
return sizeof(struct mem_section_usage) + usemap_size();
}
+static inline phys_addr_t pgdat_to_phys(struct pglist_data *pgdat)
+{
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+ return __pa_symbol(pgdat);
+#else
+ return __pa(pgdat);
+#endif
+}
+
#ifdef CONFIG_MEMORY_HOTREMOVE
static struct mem_section_usage * __init
sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
@@ -362,7 +371,7 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
* from the same section as the pgdat where possible to avoid
* this problem.
*/
- goal = __pa(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT);
+ goal = pgdat_to_phys(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT);
limit = goal + (1UL << PA_SECTION_SHIFT);
nid = early_pfn_to_nid(goal >> PAGE_SHIFT);
again:
@@ -390,7 +399,7 @@ static void __init check_usemap_section_nr(int nid,
}
usemap_snr = pfn_to_section_nr(__pa(usage) >> PAGE_SHIFT);
- pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
+ pgdat_snr = pfn_to_section_nr(pgdat_to_phys(pgdat) >> PAGE_SHIFT);
if (usemap_snr == pgdat_snr)
return;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 149e77454e3c..996afa8131c8 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1900,7 +1900,7 @@ unsigned int count_swap_pages(int type, int free)
static inline int pte_same_as_swp(pte_t pte, pte_t swp_pte)
{
- return pte_same(pte_swp_clear_soft_dirty(pte), swp_pte);
+ return pte_same(pte_swp_clear_flags(pte), swp_pte);
}
/*
diff --git a/mm/truncate.c b/mm/truncate.c
index 95af244b112a..234ddd879caa 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -167,13 +167,10 @@ void do_invalidatepage(struct page *page, unsigned int offset,
* its lock, b) when a concurrent invalidate_mapping_pages got there first and
* c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
*/
-static void
-truncate_cleanup_page(struct address_space *mapping, struct page *page)
+static void truncate_cleanup_page(struct page *page)
{
- if (page_mapped(page)) {
- unsigned int nr = thp_nr_pages(page);
- unmap_mapping_pages(mapping, page->index, nr, false);
- }
+ if (page_mapped(page))
+ unmap_mapping_page(page);
if (page_has_private(page))
do_invalidatepage(page, 0, thp_size(page));
@@ -218,7 +215,7 @@ int truncate_inode_page(struct address_space *mapping, struct page *page)
if (page->mapping != mapping)
return -EIO;
- truncate_cleanup_page(mapping, page);
+ truncate_cleanup_page(page);
delete_from_page_cache(page);
return 0;
}
@@ -325,7 +322,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
index = indices[pagevec_count(&pvec) - 1] + 1;
truncate_exceptional_pvec_entries(mapping, &pvec, indices);
for (i = 0; i < pagevec_count(&pvec); i++)
- truncate_cleanup_page(mapping, pvec.pages[i]);
+ truncate_cleanup_page(pvec.pages[i]);
delete_from_page_cache_batch(mapping, &pvec);
for (i = 0; i < pagevec_count(&pvec); i++)
unlock_page(pvec.pages[i]);
@@ -639,6 +636,16 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
continue;
}
+ if (!did_range_unmap && page_mapped(page)) {
+ /*
+ * If page is mapped, before taking its lock,
+ * zap the rest of the file in one hit.
+ */
+ unmap_mapping_pages(mapping, index,
+ (1 + end - index), false);
+ did_range_unmap = 1;
+ }
+
lock_page(page);
WARN_ON(page_to_index(page) != index);
if (page->mapping != mapping) {
@@ -646,23 +653,11 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
continue;
}
wait_on_page_writeback(page);
- if (page_mapped(page)) {
- if (!did_range_unmap) {
- /*
- * Zap the rest of the file in one hit.
- */
- unmap_mapping_pages(mapping, index,
- (1 + end - index), false);
- did_range_unmap = 1;
- } else {
- /*
- * Just zap this page
- */
- unmap_mapping_pages(mapping, index,
- 1, false);
- }
- }
+
+ if (page_mapped(page))
+ unmap_mapping_page(page);
BUG_ON(page_mapped(page));
+
ret2 = do_launder_page(mapping, page);
if (ret2 == 0) {
if (!invalidate_complete_page2(mapping, page))
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 5c70596dd1b9..a2b732cf96ea 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -82,7 +82,7 @@ int kvm_check_cap(long cap)
kvm_fd = open_kvm_dev_path_or_exit();
ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap);
- TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n"
+ TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION IOCTL failed,\n"
" rc: %i errno: %i", ret, errno);
close(kvm_fd);
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index 6ad6c8276b2e..af1031fed97f 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -166,75 +166,75 @@ size_t get_def_hugetlb_pagesz(void)
return 0;
}
+#define ANON_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS)
+#define ANON_HUGE_FLAGS (ANON_FLAGS | MAP_HUGETLB)
+
const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i)
{
- static const int anon_flags = MAP_PRIVATE | MAP_ANONYMOUS;
- static const int anon_huge_flags = anon_flags | MAP_HUGETLB;
-
static const struct vm_mem_backing_src_alias aliases[] = {
[VM_MEM_SRC_ANONYMOUS] = {
.name = "anonymous",
- .flag = anon_flags,
+ .flag = ANON_FLAGS,
},
[VM_MEM_SRC_ANONYMOUS_THP] = {
.name = "anonymous_thp",
- .flag = anon_flags,
+ .flag = ANON_FLAGS,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB] = {
.name = "anonymous_hugetlb",
- .flag = anon_huge_flags,
+ .flag = ANON_HUGE_FLAGS,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_16KB] = {
.name = "anonymous_hugetlb_16kb",
- .flag = anon_huge_flags | MAP_HUGE_16KB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_16KB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_64KB] = {
.name = "anonymous_hugetlb_64kb",
- .flag = anon_huge_flags | MAP_HUGE_64KB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_64KB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_512KB] = {
.name = "anonymous_hugetlb_512kb",
- .flag = anon_huge_flags | MAP_HUGE_512KB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_512KB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_1MB] = {
.name = "anonymous_hugetlb_1mb",
- .flag = anon_huge_flags | MAP_HUGE_1MB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_1MB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_2MB] = {
.name = "anonymous_hugetlb_2mb",
- .flag = anon_huge_flags | MAP_HUGE_2MB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_2MB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_8MB] = {
.name = "anonymous_hugetlb_8mb",
- .flag = anon_huge_flags | MAP_HUGE_8MB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_8MB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_16MB] = {
.name = "anonymous_hugetlb_16mb",
- .flag = anon_huge_flags | MAP_HUGE_16MB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_16MB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_32MB] = {
.name = "anonymous_hugetlb_32mb",
- .flag = anon_huge_flags | MAP_HUGE_32MB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_32MB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_256MB] = {
.name = "anonymous_hugetlb_256mb",
- .flag = anon_huge_flags | MAP_HUGE_256MB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_256MB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_512MB] = {
.name = "anonymous_hugetlb_512mb",
- .flag = anon_huge_flags | MAP_HUGE_512MB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_512MB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB] = {
.name = "anonymous_hugetlb_1gb",
- .flag = anon_huge_flags | MAP_HUGE_1GB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_1GB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_2GB] = {
.name = "anonymous_hugetlb_2gb",
- .flag = anon_huge_flags | MAP_HUGE_2GB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_2GB,
},
[VM_MEM_SRC_ANONYMOUS_HUGETLB_16GB] = {
.name = "anonymous_hugetlb_16gb",
- .flag = anon_huge_flags | MAP_HUGE_16GB,
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_16GB,
},
[VM_MEM_SRC_SHMEM] = {
.name = "shmem",