diff options
50 files changed, 593 insertions, 216 deletions
diff --git a/Documentation/vm/slub.rst b/Documentation/vm/slub.rst index 03f294a638bd..d3028554b1e9 100644 --- a/Documentation/vm/slub.rst +++ b/Documentation/vm/slub.rst @@ -181,7 +181,7 @@ SLUB Debug output Here is a sample of slub debug output:: ==================================================================== - BUG kmalloc-8: Redzone overwritten + BUG kmalloc-8: Right Redzone overwritten -------------------------------------------------------------------- INFO: 0xc90f6d28-0xc90f6d2b. First byte 0x00 instead of 0xcc @@ -189,10 +189,10 @@ Here is a sample of slub debug output:: INFO: Object 0xc90f6d20 @offset=3360 fp=0xc90f6d58 INFO: Allocated in get_modalias+0x61/0xf5 age=53 cpu=1 pid=554 - Bytes b4 0xc90f6d10: 00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ - Object 0xc90f6d20: 31 30 31 39 2e 30 30 35 1019.005 - Redzone 0xc90f6d28: 00 cc cc cc . - Padding 0xc90f6d50: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ + Bytes b4 (0xc90f6d10): 00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ + Object (0xc90f6d20): 31 30 31 39 2e 30 30 35 1019.005 + Redzone (0xc90f6d28): 00 cc cc cc . + Padding (0xc90f6d50): 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ [<c010523d>] dump_trace+0x63/0x1eb [<c01053df>] show_trace_log_lvl+0x1a/0x2f @@ -929,11 +929,14 @@ CC_FLAGS_LTO += -fvisibility=hidden # Limit inlining across translation units to reduce binary size KBUILD_LDFLAGS += -mllvm -import-instr-limit=5 -# Check for frame size exceeding threshold during prolog/epilog insertion. +# Check for frame size exceeding threshold during prolog/epilog insertion +# when using lld < 13.0.0. ifneq ($(CONFIG_FRAME_WARN),0) +ifeq ($(shell test $(CONFIG_LLD_VERSION) -lt 130000; echo $$?),0) KBUILD_LDFLAGS += -plugin-opt=-warn-stack-size=$(CONFIG_FRAME_WARN) endif endif +endif ifdef CONFIG_LTO KBUILD_CFLAGS += -fno-lto $(CC_FLAGS_LTO) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 9a48f138832d..b4da665bb892 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -655,6 +655,7 @@ static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func) if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP)) entry->ecx = F(RDPID); ++array->nent; + break; default: break; } diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 6d72d8f43310..17fa4ab1b834 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1410,6 +1410,9 @@ int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len, if (!apic_x2apic_mode(apic)) valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI); + if (alignment + len > 4) + return 1; + if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset))) return 1; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 0144c40d09c7..8d5876dfc6b7 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4739,9 +4739,33 @@ static void init_kvm_softmmu(struct kvm_vcpu *vcpu) context->inject_page_fault = kvm_inject_page_fault; } +static union kvm_mmu_role kvm_calc_nested_mmu_role(struct kvm_vcpu *vcpu) +{ + union kvm_mmu_role role = kvm_calc_shadow_root_page_role_common(vcpu, false); + + /* + * Nested MMUs are used only for walking L2's gva->gpa, they never have + * shadow pages of their own and so "direct" has no meaning. Set it + * to "true" to try to detect bogus usage of the nested MMU. + */ + role.base.direct = true; + + if (!is_paging(vcpu)) + role.base.level = 0; + else if (is_long_mode(vcpu)) + role.base.level = is_la57_mode(vcpu) ? PT64_ROOT_5LEVEL : + PT64_ROOT_4LEVEL; + else if (is_pae(vcpu)) + role.base.level = PT32E_ROOT_LEVEL; + else + role.base.level = PT32_ROOT_LEVEL; + + return role; +} + static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu) { - union kvm_mmu_role new_role = kvm_calc_mmu_role_common(vcpu, false); + union kvm_mmu_role new_role = kvm_calc_nested_mmu_role(vcpu); struct kvm_mmu *g_context = &vcpu->arch.nested_mmu; if (new_role.as_u64 == g_context->mmu_role.as_u64) diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 0e62e6a2438c..5e7e920113f3 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -221,7 +221,7 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu, return &avic_physical_id_table[index]; } -/** +/* * Note: * AVIC hardware walks the nested page table to check permissions, * but does not use the SPA address specified in the leaf page @@ -764,7 +764,7 @@ out: return ret; } -/** +/* * Note: * The HW cannot support posting multicast/broadcast * interrupts to a vCPU. So, we still use legacy interrupt @@ -1005,7 +1005,7 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu) WRITE_ONCE(*(svm->avic_physical_id_cache), entry); } -/** +/* * This function is called during VCPU halt/unhalt. */ static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index e0ce5da97fc2..8d36f0c73071 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -199,9 +199,19 @@ static void sev_asid_free(struct kvm_sev_info *sev) sev->misc_cg = NULL; } -static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) +static void sev_decommission(unsigned int handle) { struct sev_data_decommission decommission; + + if (!handle) + return; + + decommission.handle = handle; + sev_guest_decommission(&decommission, NULL); +} + +static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) +{ struct sev_data_deactivate deactivate; if (!handle) @@ -214,9 +224,7 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) sev_guest_deactivate(&deactivate, NULL); up_read(&sev_deactivate_lock); - /* decommission handle */ - decommission.handle = handle; - sev_guest_decommission(&decommission, NULL); + sev_decommission(handle); } static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) @@ -341,8 +349,10 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) /* Bind ASID to this guest */ ret = sev_bind_asid(kvm, start.handle, error); - if (ret) + if (ret) { + sev_decommission(start.handle); goto e_free_session; + } /* return handle to userspace */ params.handle = start.handle; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 50b42d7a8a11..c2a779b688e6 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6247,6 +6247,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) switch (kvm_get_apic_mode(vcpu)) { case LAPIC_MODE_INVALID: WARN_ONCE(true, "Invalid local APIC state"); + break; case LAPIC_MODE_DISABLED: break; case LAPIC_MODE_XAPIC: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6d3955a6a763..e0f4a46649d7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7106,7 +7106,10 @@ static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt) static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags) { - emul_to_vcpu(ctxt)->arch.hflags = emul_flags; + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); + + vcpu->arch.hflags = emul_flags; + kvm_mmu_reset_context(vcpu); } static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt, @@ -8258,6 +8261,7 @@ void kvm_arch_exit(void) kvm_x86_ops.hardware_enable = NULL; kvm_mmu_module_exit(); free_percpu(user_return_msrs); + kmem_cache_destroy(x86_emulator_cache); kmem_cache_destroy(x86_fpu_cache); #ifdef CONFIG_KVM_XEN static_key_deferred_flush(&kvm_xen_enabled); diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 6ab9d9a488a6..39b5b46e880f 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -59,6 +59,7 @@ config DMA_OF #devices config ALTERA_MSGDMA tristate "Altera / Intel mSGDMA Engine" + depends on HAS_IOMEM select DMA_ENGINE help Enable support for Altera / Intel mSGDMA controller. @@ -701,6 +702,7 @@ config XILINX_ZYNQMP_DMA config XILINX_ZYNQMP_DPDMA tristate "Xilinx DPDMA Engine" + depends on HAS_IOMEM && OF select DMA_ENGINE select DMA_VIRTUAL_CHANNELS help diff --git a/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c index 4ec909e0b810..4ae057922ef1 100644 --- a/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c +++ b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c @@ -332,6 +332,7 @@ static int __cold dpaa2_qdma_setup(struct fsl_mc_device *ls_dev) } if (priv->dpdmai_attr.version.major > DPDMAI_VER_MAJOR) { + err = -EINVAL; dev_err(dev, "DPDMAI major version mismatch\n" "Found %u.%u, supported version is %u.%u\n", priv->dpdmai_attr.version.major, @@ -341,6 +342,7 @@ static int __cold dpaa2_qdma_setup(struct fsl_mc_device *ls_dev) } if (priv->dpdmai_attr.version.minor > DPDMAI_VER_MINOR) { + err = -EINVAL; dev_err(dev, "DPDMAI minor version mismatch\n" "Found %u.%u, supported version is %u.%u\n", priv->dpdmai_attr.version.major, @@ -475,6 +477,7 @@ static int __cold dpaa2_qdma_dpio_setup(struct dpaa2_qdma_priv *priv) ppriv->store = dpaa2_io_store_create(DPAA2_QDMA_STORE_SIZE, dev); if (!ppriv->store) { + err = -ENOMEM; dev_err(dev, "dpaa2_io_store_create() failed\n"); goto err_store; } diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index 302cba5ff779..d4419bf1fede 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -110,6 +110,7 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) pasid = iommu_sva_get_pasid(sva); if (pasid == IOMMU_PASID_INVALID) { iommu_sva_unbind_device(sva); + rc = -EINVAL; goto failed; } diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 776fd44aff5f..442d55c11a5f 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -168,6 +168,32 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) return rc; } +static void idxd_cleanup_interrupts(struct idxd_device *idxd) +{ + struct pci_dev *pdev = idxd->pdev; + struct idxd_irq_entry *irq_entry; + int i, msixcnt; + + msixcnt = pci_msix_vec_count(pdev); + if (msixcnt <= 0) + return; + + irq_entry = &idxd->irq_entries[0]; + free_irq(irq_entry->vector, irq_entry); + + for (i = 1; i < msixcnt; i++) { + + irq_entry = &idxd->irq_entries[i]; + if (idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE)) + idxd_device_release_int_handle(idxd, idxd->int_handles[i], + IDXD_IRQ_MSIX); + free_irq(irq_entry->vector, irq_entry); + } + + idxd_mask_error_interrupts(idxd); + pci_free_irq_vectors(pdev); +} + static int idxd_setup_wqs(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; @@ -242,6 +268,7 @@ static int idxd_setup_engines(struct idxd_device *idxd) engine->idxd = idxd; device_initialize(&engine->conf_dev); engine->conf_dev.parent = &idxd->conf_dev; + engine->conf_dev.bus = &dsa_bus_type; engine->conf_dev.type = &idxd_engine_device_type; rc = dev_set_name(&engine->conf_dev, "engine%d.%d", idxd->id, engine->id); if (rc < 0) { @@ -303,6 +330,19 @@ static int idxd_setup_groups(struct idxd_device *idxd) return rc; } +static void idxd_cleanup_internals(struct idxd_device *idxd) +{ + int i; + + for (i = 0; i < idxd->max_groups; i++) + put_device(&idxd->groups[i]->conf_dev); + for (i = 0; i < idxd->max_engines; i++) + put_device(&idxd->engines[i]->conf_dev); + for (i = 0; i < idxd->max_wqs; i++) + put_device(&idxd->wqs[i]->conf_dev); + destroy_workqueue(idxd->wq); +} + static int idxd_setup_internals(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; @@ -531,12 +571,12 @@ static int idxd_probe(struct idxd_device *idxd) dev_dbg(dev, "Loading RO device config\n"); rc = idxd_device_load_config(idxd); if (rc < 0) - goto err; + goto err_config; } rc = idxd_setup_interrupts(idxd); if (rc) - goto err; + goto err_config; dev_dbg(dev, "IDXD interrupt setup complete.\n"); @@ -549,6 +589,8 @@ static int idxd_probe(struct idxd_device *idxd) dev_dbg(dev, "IDXD device %d probed successfully\n", idxd->id); return 0; + err_config: + idxd_cleanup_internals(idxd); err: if (device_pasid_enabled(idxd)) idxd_disable_system_pasid(idxd); @@ -556,6 +598,18 @@ static int idxd_probe(struct idxd_device *idxd) return rc; } +static void idxd_cleanup(struct idxd_device *idxd) +{ + struct device *dev = &idxd->pdev->dev; + + perfmon_pmu_remove(idxd); + idxd_cleanup_interrupts(idxd); + idxd_cleanup_internals(idxd); + if (device_pasid_enabled(idxd)) + idxd_disable_system_pasid(idxd); + iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA); +} + static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct device *dev = &pdev->dev; @@ -608,7 +662,7 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) rc = idxd_register_devices(idxd); if (rc) { dev_err(dev, "IDXD sysfs setup failed\n"); - goto err; + goto err_dev_register; } idxd->state = IDXD_DEV_CONF_READY; @@ -618,6 +672,8 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; + err_dev_register: + idxd_cleanup(idxd); err: pci_iounmap(pdev, idxd->reg_base); err_iomap: @@ -787,6 +843,7 @@ module_init(idxd_init_module); static void __exit idxd_exit_module(void) { + idxd_unregister_driver(); pci_unregister_driver(&idxd_pci_driver); idxd_cdev_remove(); idxd_unregister_bus_type(); diff --git a/drivers/dma/ipu/ipu_irq.c b/drivers/dma/ipu/ipu_irq.c index 0d5c42f7bfa4..97d9a6f04f2a 100644 --- a/drivers/dma/ipu/ipu_irq.c +++ b/drivers/dma/ipu/ipu_irq.c @@ -230,7 +230,7 @@ out: } /** - * ipu_irq_map() - map an IPU interrupt source to an IRQ number + * ipu_irq_unmap() - unmap an IPU interrupt source * @source: interrupt source bit position (see ipu_irq_map()) * @return: 0 or negative error code */ diff --git a/drivers/dma/mediatek/mtk-uart-apdma.c b/drivers/dma/mediatek/mtk-uart-apdma.c index 27c07350971d..375e7e647df6 100644 --- a/drivers/dma/mediatek/mtk-uart-apdma.c +++ b/drivers/dma/mediatek/mtk-uart-apdma.c @@ -131,10 +131,7 @@ static unsigned int mtk_uart_apdma_read(struct mtk_chan *c, unsigned int reg) static void mtk_uart_apdma_desc_free(struct virt_dma_desc *vd) { - struct dma_chan *chan = vd->tx.chan; - struct mtk_chan *c = to_mtk_uart_apdma_chan(chan); - - kfree(c->desc); + kfree(container_of(vd, struct mtk_uart_apdma_desc, vd)); } static void mtk_uart_apdma_start_tx(struct mtk_chan *c) @@ -207,14 +204,9 @@ static void mtk_uart_apdma_start_rx(struct mtk_chan *c) static void mtk_uart_apdma_tx_handler(struct mtk_chan *c) { - struct mtk_uart_apdma_desc *d = c->desc; - mtk_uart_apdma_write(c, VFF_INT_FLAG, VFF_TX_INT_CLR_B); mtk_uart_apdma_write(c, VFF_INT_EN, VFF_INT_EN_CLR_B); mtk_uart_apdma_write(c, VFF_EN, VFF_EN_CLR_B); - - list_del(&d->vd.node); - vchan_cookie_complete(&d->vd); } static void mtk_uart_apdma_rx_handler(struct mtk_chan *c) @@ -245,9 +237,17 @@ static void mtk_uart_apdma_rx_handler(struct mtk_chan *c) c->rx_status = d->avail_len - cnt; mtk_uart_apdma_write(c, VFF_RPT, wg); +} - list_del(&d->vd.node); - vchan_cookie_complete(&d->vd); +static void mtk_uart_apdma_chan_complete_handler(struct mtk_chan *c) +{ + struct mtk_uart_apdma_desc *d = c->desc; + + if (d) { + list_del(&d->vd.node); + vchan_cookie_complete(&d->vd); + c->desc = NULL; + } } static irqreturn_t mtk_uart_apdma_irq_handler(int irq, void *dev_id) @@ -261,6 +261,7 @@ static irqreturn_t mtk_uart_apdma_irq_handler(int irq, void *dev_id) mtk_uart_apdma_rx_handler(c); else if (c->dir == DMA_MEM_TO_DEV) mtk_uart_apdma_tx_handler(c); + mtk_uart_apdma_chan_complete_handler(c); spin_unlock_irqrestore(&c->vc.lock, flags); return IRQ_HANDLED; @@ -348,7 +349,7 @@ static struct dma_async_tx_descriptor *mtk_uart_apdma_prep_slave_sg return NULL; /* Now allocate and setup the descriptor */ - d = kzalloc(sizeof(*d), GFP_ATOMIC); + d = kzalloc(sizeof(*d), GFP_NOWAIT); if (!d) return NULL; @@ -366,7 +367,7 @@ static void mtk_uart_apdma_issue_pending(struct dma_chan *chan) unsigned long flags; spin_lock_irqsave(&c->vc.lock, flags); - if (vchan_issue_pending(&c->vc)) { + if (vchan_issue_pending(&c->vc) && !c->desc) { vd = vchan_next_desc(&c->vc); c->desc = to_mtk_uart_apdma_desc(&vd->tx); diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index fd8d2bc3be9f..110de8a60058 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -2694,13 +2694,15 @@ static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic( for (i = 0; i < len / period_len; i++) { desc = pl330_get_desc(pch); if (!desc) { + unsigned long iflags; + dev_err(pch->dmac->ddma.dev, "%s:%d Unable to fetch desc\n", __func__, __LINE__); if (!first) return NULL; - spin_lock_irqsave(&pl330->pool_lock, flags); + spin_lock_irqsave(&pl330->pool_lock, iflags); while (!list_empty(&first->node)) { desc = list_entry(first->node.next, @@ -2710,7 +2712,7 @@ static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic( list_move_tail(&first->node, &pl330->desc_pool); - spin_unlock_irqrestore(&pl330->pool_lock, flags); + spin_unlock_irqrestore(&pl330->pool_lock, iflags); return NULL; } diff --git a/drivers/dma/qcom/Kconfig b/drivers/dma/qcom/Kconfig index 365f94eb3b08..3f926a653bd8 100644 --- a/drivers/dma/qcom/Kconfig +++ b/drivers/dma/qcom/Kconfig @@ -33,6 +33,7 @@ config QCOM_GPI_DMA config QCOM_HIDMA_MGMT tristate "Qualcomm Technologies HIDMA Management support" + depends on HAS_IOMEM select DMA_ENGINE help Enable support for the Qualcomm Technologies HIDMA Management. diff --git a/drivers/dma/sf-pdma/Kconfig b/drivers/dma/sf-pdma/Kconfig index f8ffa02e279f..ba46a0a15a93 100644 --- a/drivers/dma/sf-pdma/Kconfig +++ b/drivers/dma/sf-pdma/Kconfig @@ -1,5 +1,6 @@ config SF_PDMA tristate "Sifive PDMA controller driver" + depends on HAS_IOMEM select DMA_ENGINE select DMA_VIRTUAL_CHANNELS help diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c index d530c1bf11d9..6885b3dcd7a9 100644 --- a/drivers/dma/sh/rcar-dmac.c +++ b/drivers/dma/sh/rcar-dmac.c @@ -1913,7 +1913,7 @@ static int rcar_dmac_probe(struct platform_device *pdev) /* Enable runtime PM and initialize the device. */ pm_runtime_enable(&pdev->dev); - ret = pm_runtime_get_sync(&pdev->dev); + ret = pm_runtime_resume_and_get(&pdev->dev); if (ret < 0) { dev_err(&pdev->dev, "runtime PM get sync failed (%d)\n", ret); return ret; diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c index 265d7c07b348..e1827393143f 100644 --- a/drivers/dma/ste_dma40.c +++ b/drivers/dma/ste_dma40.c @@ -3675,6 +3675,9 @@ static int __init d40_probe(struct platform_device *pdev) kfree(base->lcla_pool.base_unaligned); + if (base->lcpa_base) + iounmap(base->lcpa_base); + if (base->phy_lcpa) release_mem_region(base->phy_lcpa, base->lcpa_size); diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c index 36ba8b43e78d..18cbd1e43c2e 100644 --- a/drivers/dma/stm32-mdma.c +++ b/drivers/dma/stm32-mdma.c @@ -1452,7 +1452,7 @@ static int stm32_mdma_alloc_chan_resources(struct dma_chan *c) return -ENOMEM; } - ret = pm_runtime_get_sync(dmadev->ddev.dev); + ret = pm_runtime_resume_and_get(dmadev->ddev.dev); if (ret < 0) return ret; @@ -1718,7 +1718,7 @@ static int stm32_mdma_pm_suspend(struct device *dev) u32 ccr, id; int ret; - ret = pm_runtime_get_sync(dev); + ret = pm_runtime_resume_and_get(dev); if (ret < 0) return ret; diff --git a/drivers/dma/xilinx/xilinx_dpdma.c b/drivers/dma/xilinx/xilinx_dpdma.c index 70b29bd079c9..6c709803203a 100644 --- a/drivers/dma/xilinx/xilinx_dpdma.c +++ b/drivers/dma/xilinx/xilinx_dpdma.c @@ -113,6 +113,7 @@ #define XILINX_DPDMA_CH_VDO 0x020 #define XILINX_DPDMA_CH_PYLD_SZ 0x024 #define XILINX_DPDMA_CH_DESC_ID 0x028 +#define XILINX_DPDMA_CH_DESC_ID_MASK GENMASK(15, 0) /* DPDMA descriptor fields */ #define XILINX_DPDMA_DESC_CONTROL_PREEMBLE 0xa5 @@ -866,7 +867,8 @@ static void xilinx_dpdma_chan_queue_transfer(struct xilinx_dpdma_chan *chan) * will be used, but it should be enough. */ list_for_each_entry(sw_desc, &desc->descriptors, node) - sw_desc->hw.desc_id = desc->vdesc.tx.cookie; + sw_desc->hw.desc_id = desc->vdesc.tx.cookie + & XILINX_DPDMA_CH_DESC_ID_MASK; sw_desc = list_first_entry(&desc->descriptors, struct xilinx_dpdma_sw_desc, node); @@ -1086,7 +1088,8 @@ static void xilinx_dpdma_chan_vsync_irq(struct xilinx_dpdma_chan *chan) if (!chan->running || !pending) goto out; - desc_id = dpdma_read(chan->reg, XILINX_DPDMA_CH_DESC_ID); + desc_id = dpdma_read(chan->reg, XILINX_DPDMA_CH_DESC_ID) + & XILINX_DPDMA_CH_DESC_ID_MASK; /* If the retrigger raced with vsync, retry at the next frame. */ sw_desc = list_first_entry(&pending->descriptors, @@ -1459,7 +1462,7 @@ static void xilinx_dpdma_enable_irq(struct xilinx_dpdma_device *xdev) */ static void xilinx_dpdma_disable_irq(struct xilinx_dpdma_device *xdev) { - dpdma_write(xdev->reg, XILINX_DPDMA_IDS, XILINX_DPDMA_INTR_ERR_ALL); + dpdma_write(xdev->reg, XILINX_DPDMA_IDS, XILINX_DPDMA_INTR_ALL); dpdma_write(xdev->reg, XILINX_DPDMA_EIDS, XILINX_DPDMA_EINTR_ALL); } @@ -1596,6 +1599,26 @@ static struct dma_chan *of_dma_xilinx_xlate(struct of_phandle_args *dma_spec, return dma_get_slave_channel(&xdev->chan[chan_id]->vchan.chan); } +static void dpdma_hw_init(struct xilinx_dpdma_device *xdev) +{ + unsigned int i; + void __iomem *reg; + + /* Disable all interrupts */ + xilinx_dpdma_disable_irq(xdev); + + /* Stop all channels */ + for (i = 0; i < ARRAY_SIZE(xdev->chan); i++) { + reg = xdev->reg + XILINX_DPDMA_CH_BASE + + XILINX_DPDMA_CH_OFFSET * i; + dpdma_clr(reg, XILINX_DPDMA_CH_CNTL, XILINX_DPDMA_CH_CNTL_ENABLE); + } + + /* Clear the interrupt status registers */ + dpdma_write(xdev->reg, XILINX_DPDMA_ISR, XILINX_DPDMA_INTR_ALL); + dpdma_write(xdev->reg, XILINX_DPDMA_EISR, XILINX_DPDMA_EINTR_ALL); +} + static int xilinx_dpdma_probe(struct platform_device *pdev) { struct xilinx_dpdma_device *xdev; @@ -1622,6 +1645,8 @@ static int xilinx_dpdma_probe(struct platform_device *pdev) if (IS_ERR(xdev->reg)) return PTR_ERR(xdev->reg); + dpdma_hw_init(xdev); + xdev->irq = platform_get_irq(pdev, 0); if (xdev->irq < 0) { dev_err(xdev->dev, "failed to get platform irq\n"); diff --git a/drivers/dma/xilinx/zynqmp_dma.c b/drivers/dma/xilinx/zynqmp_dma.c index d8419565b92c..5fecf5aa6e85 100644 --- a/drivers/dma/xilinx/zynqmp_dma.c +++ b/drivers/dma/xilinx/zynqmp_dma.c @@ -468,7 +468,7 @@ static int zynqmp_dma_alloc_chan_resources(struct dma_chan *dchan) struct zynqmp_dma_desc_sw *desc; int i, ret; - ret = pm_runtime_get_sync(chan->dev); + ret = pm_runtime_resume_and_get(chan->dev); if (ret < 0) return ret; diff --git a/fs/afs/main.c b/fs/afs/main.c index b2975256dadb..179004b15566 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -203,8 +203,8 @@ static int __init afs_init(void) goto error_fs; afs_proc_symlink = proc_symlink("fs/afs", NULL, "../self/net/afs"); - if (IS_ERR(afs_proc_symlink)) { - ret = PTR_ERR(afs_proc_symlink); + if (!afs_proc_symlink) { + ret = -ENOMEM; goto error_proc; } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 55efd3dd04f6..30dee68458c7 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -735,6 +735,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, __SetPageUptodate(page); error = huge_add_to_page_cache(page, mapping, index); if (unlikely(error)) { + restore_reserve_on_error(h, &pseudo_vma, addr, page); put_page(page); mutex_unlock(&hugetlb_fault_mutex_table[hash]); goto out; diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index be5b6d2c01e7..64864fb40b40 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -471,7 +471,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, info_type, fanotify_info_name(info), info->name_len, buf, count); if (ret < 0) - return ret; + goto out_close_fd; buf += ret; count -= ret; @@ -519,7 +519,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, fanotify_event_object_fh(event), info_type, dot, dot_len, buf, count); if (ret < 0) - return ret; + goto out_close_fd; buf += ret; count -= ret; diff --git a/fs/proc/base.c b/fs/proc/base.c index 7118ebe38fa6..9cbd915025ad 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2676,7 +2676,9 @@ out: #ifdef CONFIG_SECURITY static int proc_pid_attr_open(struct inode *inode, struct file *file) { - return __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS); + file->private_data = NULL; + __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS); + return 0; } static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 9626fda5efce..2a8ebe6c222e 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -286,6 +286,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd); extern struct page *huge_zero_page; +extern unsigned long huge_zero_pfn; static inline bool is_huge_zero_page(struct page *page) { @@ -294,7 +295,7 @@ static inline bool is_huge_zero_page(struct page *page) static inline bool is_huge_zero_pmd(pmd_t pmd) { - return is_huge_zero_page(pmd_page(pmd)); + return READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd) && pmd_present(pmd); } static inline bool is_huge_zero_pud(pud_t pud) @@ -440,6 +441,11 @@ static inline bool is_huge_zero_page(struct page *page) return false; } +static inline bool is_huge_zero_pmd(pmd_t pmd) +{ + return false; +} + static inline bool is_huge_zero_pud(pud_t pud) { return false; diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index b92f25ccef58..6504346a1947 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -149,6 +149,7 @@ bool hugetlb_reserve_pages(struct inode *inode, long from, long to, long hugetlb_unreserve_pages(struct inode *inode, long start, long end, long freed); bool isolate_huge_page(struct page *page, struct list_head *list); +int get_hwpoison_huge_page(struct page *page, bool *hugetlb); void putback_active_hugepage(struct page *page); void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason); void free_huge_page(struct page *page); @@ -339,6 +340,11 @@ static inline bool isolate_huge_page(struct page *page, struct list_head *list) return false; } +static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb) +{ + return 0; +} + static inline void putback_active_hugepage(struct page *page) { } @@ -604,6 +610,8 @@ struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma, unsigned long address); int huge_add_to_page_cache(struct page *page, struct address_space *mapping, pgoff_t idx); +void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, + unsigned long address, struct page *page); /* arch callback */ int __init __alloc_bootmem_huge_page(struct hstate *h); diff --git a/include/linux/mm.h b/include/linux/mm.h index c274f75efcf9..8ae31622deef 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1719,6 +1719,7 @@ struct zap_details { struct address_space *check_mapping; /* Check page->mapping if set */ pgoff_t first_index; /* Lowest page->index to unmap */ pgoff_t last_index; /* Highest page->index to unmap */ + struct page *single_page; /* Locked page to be unmapped */ }; struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, @@ -1766,6 +1767,7 @@ extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma, extern int fixup_user_fault(struct mm_struct *mm, unsigned long address, unsigned int fault_flags, bool *unlocked); +void unmap_mapping_page(struct page *page); void unmap_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t nr, bool even_cows); void unmap_mapping_range(struct address_space *mapping, @@ -1786,6 +1788,7 @@ static inline int fixup_user_fault(struct mm_struct *mm, unsigned long address, BUG(); return -EFAULT; } +static inline void unmap_mapping_page(struct page *page) { } static inline void unmap_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t nr, bool even_cows) { } static inline void unmap_mapping_range(struct address_space *mapping, diff --git a/include/linux/rmap.h b/include/linux/rmap.h index def5c62c93b3..8d04e7deedc6 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -91,6 +91,7 @@ enum ttu_flags { TTU_SPLIT_HUGE_PMD = 0x4, /* split huge PMD if any */ TTU_IGNORE_MLOCK = 0x8, /* ignore mlock */ + TTU_SYNC = 0x10, /* avoid racy checks with PVMW_SYNC */ TTU_IGNORE_HWPOISON = 0x20, /* corrupted page is recoverable */ TTU_BATCH_FLUSH = 0x40, /* Batch TLB flushes where possible * and caller guarantees they will diff --git a/include/linux/swapops.h b/include/linux/swapops.h index d9b7c9132c2f..6430a94c6981 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -23,6 +23,16 @@ #define SWP_TYPE_SHIFT (BITS_PER_XA_VALUE - MAX_SWAPFILES_SHIFT) #define SWP_OFFSET_MASK ((1UL << SWP_TYPE_SHIFT) - 1) +/* Clear all flags but only keep swp_entry_t related information */ +static inline pte_t pte_swp_clear_flags(pte_t pte) +{ + if (pte_swp_soft_dirty(pte)) + pte = pte_swp_clear_soft_dirty(pte); + if (pte_swp_uffd_wp(pte)) + pte = pte_swp_clear_uffd_wp(pte); + return pte; +} + /* * Store a type+offset into a swp_entry_t in an arch-independent format */ @@ -66,10 +76,7 @@ static inline swp_entry_t pte_to_swp_entry(pte_t pte) { swp_entry_t arch_entry; - if (pte_swp_soft_dirty(pte)) - pte = pte_swp_clear_soft_dirty(pte); - if (pte_swp_uffd_wp(pte)) - pte = pte_swp_clear_uffd_wp(pte); + pte = pte_swp_clear_flags(pte); arch_entry = __pte_to_swp_entry(pte); return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry)); } diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 6de5a7fc066b..d2a942086fcb 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -863,8 +863,7 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise) __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2) #define __NR_mount_setattr 442 __SYSCALL(__NR_mount_setattr, sys_mount_setattr) -#define __NR_quotactl_path 443 -__SYSCALL(__NR_quotactl_path, sys_quotactl_path) +/* 443 is reserved for quotactl_path */ #define __NR_landlock_create_ruleset 444 __SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset) diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 825284baaf46..684a6061a13a 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -464,6 +464,7 @@ static int __init crash_save_vmcoreinfo_init(void) VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); VMCOREINFO_STRUCT_SIZE(mem_section); VMCOREINFO_OFFSET(mem_section, section_mem_map); + VMCOREINFO_NUMBER(SECTION_SIZE_BITS); VMCOREINFO_NUMBER(MAX_PHYSMEM_BITS); #endif VMCOREINFO_STRUCT_SIZE(page); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 63ed6b25deaa..6d2a0119fc58 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -62,6 +62,7 @@ static struct shrinker deferred_split_shrinker; static atomic_t huge_zero_refcount; struct page *huge_zero_page __read_mostly; +unsigned long huge_zero_pfn __read_mostly = ~0UL; bool transparent_hugepage_enabled(struct vm_area_struct *vma) { @@ -98,6 +99,7 @@ retry: __free_pages(zero_page, compound_order(zero_page)); goto retry; } + WRITE_ONCE(huge_zero_pfn, page_to_pfn(zero_page)); /* We take additional reference here. It will be put back by shrinker */ atomic_set(&huge_zero_refcount, 2); @@ -147,6 +149,7 @@ static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink, if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) { struct page *zero_page = xchg(&huge_zero_page, NULL); BUG_ON(zero_page == NULL); + WRITE_ONCE(huge_zero_pfn, ~0UL); __free_pages(zero_page, compound_order(zero_page)); return HPAGE_PMD_NR; } @@ -2044,7 +2047,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, count_vm_event(THP_SPLIT_PMD); if (!vma_is_anonymous(vma)) { - _pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd); + old_pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd); /* * We are going to unmap this huge page. So * just go ahead and zap it @@ -2053,16 +2056,25 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, zap_deposited_table(mm, pmd); if (vma_is_special_huge(vma)) return; - page = pmd_page(_pmd); - if (!PageDirty(page) && pmd_dirty(_pmd)) - set_page_dirty(page); - if (!PageReferenced(page) && pmd_young(_pmd)) - SetPageReferenced(page); - page_remove_rmap(page, true); - put_page(page); + if (unlikely(is_pmd_migration_entry(old_pmd))) { + swp_entry_t entry; + + entry = pmd_to_swp_entry(old_pmd); + page = migration_entry_to_page(entry); + } else { + page = pmd_page(old_pmd); + if (!PageDirty(page) && pmd_dirty(old_pmd)) + set_page_dirty(page); + if (!PageReferenced(page) && pmd_young(old_pmd)) + SetPageReferenced(page); + page_remove_rmap(page, true); + put_page(page); + } add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR); return; - } else if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) { + } + + if (is_huge_zero_pmd(*pmd)) { /* * FIXME: Do we want to invalidate secondary mmu by calling * mmu_notifier_invalidate_range() see comments below inside @@ -2338,17 +2350,17 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma, static void unmap_page(struct page *page) { - enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | + enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_SYNC | TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD; - bool unmap_success; VM_BUG_ON_PAGE(!PageHead(page), page); if (PageAnon(page)) ttu_flags |= TTU_SPLIT_FREEZE; - unmap_success = try_to_unmap(page, ttu_flags); - VM_BUG_ON_PAGE(!unmap_success, page); + try_to_unmap(page, ttu_flags); + + VM_WARN_ON_ONCE_PAGE(page_mapped(page), page); } static void remap_page(struct page *page, unsigned int nr) @@ -2659,7 +2671,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) struct deferred_split *ds_queue = get_deferred_split_queue(head); struct anon_vma *anon_vma = NULL; struct address_space *mapping = NULL; - int count, mapcount, extra_pins, ret; + int extra_pins, ret; pgoff_t end; VM_BUG_ON_PAGE(is_huge_zero_page(head), head); @@ -2718,7 +2730,6 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) } unmap_page(head); - VM_BUG_ON_PAGE(compound_mapcount(head), head); /* block interrupt reentry in xa_lock and spinlock */ local_irq_disable(); @@ -2736,9 +2747,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) /* Prevent deferred_split_scan() touching ->_refcount */ spin_lock(&ds_queue->split_queue_lock); - count = page_count(head); - mapcount = total_mapcount(head); - if (!mapcount && page_ref_freeze(head, 1 + extra_pins)) { + if (page_ref_freeze(head, 1 + extra_pins)) { if (!list_empty(page_deferred_list(head))) { ds_queue->split_queue_len--; list_del(page_deferred_list(head)); @@ -2758,16 +2767,9 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) __split_huge_page(page, list, end); ret = 0; } else { - if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) { - pr_alert("total_mapcount: %u, page_count(): %u\n", - mapcount, count); - if (PageTail(page)) - dump_page(head, NULL); - dump_page(page, "total_mapcount(head) > 0"); - BUG(); - } spin_unlock(&ds_queue->split_queue_lock); -fail: if (mapping) +fail: + if (mapping) xa_unlock(&mapping->i_pages); local_irq_enable(); remap_page(head, thp_nr_pages(head)); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 5560b50876fb..e0a5f9cbbece 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2121,12 +2121,18 @@ out: * be restored when a newly allocated huge page must be freed. It is * to be called after calling vma_needs_reservation to determine if a * reservation exists. + * + * vma_del_reservation is used in error paths where an entry in the reserve + * map was created during huge page allocation and must be removed. It is to + * be called after calling vma_needs_reservation to determine if a reservation + * exists. */ enum vma_resv_mode { VMA_NEEDS_RESV, VMA_COMMIT_RESV, VMA_END_RESV, VMA_ADD_RESV, + VMA_DEL_RESV, }; static long __vma_reservation_common(struct hstate *h, struct vm_area_struct *vma, unsigned long addr, @@ -2170,11 +2176,21 @@ static long __vma_reservation_common(struct hstate *h, ret = region_del(resv, idx, idx + 1); } break; + case VMA_DEL_RESV: + if (vma->vm_flags & VM_MAYSHARE) { + region_abort(resv, idx, idx + 1, 1); + ret = region_del(resv, idx, idx + 1); + } else { + ret = region_add(resv, idx, idx + 1, 1, NULL, NULL); + /* region_add calls of range 1 should never fail. */ + VM_BUG_ON(ret < 0); + } + break; default: BUG(); } - if (vma->vm_flags & VM_MAYSHARE) + if (vma->vm_flags & VM_MAYSHARE || mode == VMA_DEL_RESV) return ret; /* * We know private mapping must have HPAGE_RESV_OWNER set. @@ -2222,25 +2238,39 @@ static long vma_add_reservation(struct hstate *h, return __vma_reservation_common(h, vma, addr, VMA_ADD_RESV); } +static long vma_del_reservation(struct hstate *h, + struct vm_area_struct *vma, unsigned long addr) +{ + return __vma_reservation_common(h, vma, addr, VMA_DEL_RESV); +} + /* - * This routine is called to restore a reservation on error paths. In the - * specific error paths, a huge page was allocated (via alloc_huge_page) - * and is about to be freed. If a reservation for the page existed, - * alloc_huge_page would have consumed the reservation and set - * HPageRestoreReserve in the newly allocated page. When the page is freed - * via free_huge_page, the global reservation count will be incremented if - * HPageRestoreReserve is set. However, free_huge_page can not adjust the - * reserve map. Adjust the reserve map here to be consistent with global - * reserve count adjustments to be made by free_huge_page. + * This routine is called to restore reservation information on error paths. + * It should ONLY be called for pages allocated via alloc_huge_page(), and + * the hugetlb mutex should remain held when calling this routine. + * + * It handles two specific cases: + * 1) A reservation was in place and the page consumed the reservation. + * HPageRestoreReserve is set in the page. + * 2) No reservation was in place for the page, so HPageRestoreReserve is + * not set. However, alloc_huge_page always updates the reserve map. + * + * In case 1, free_huge_page later in the error path will increment the + * global reserve count. But, free_huge_page does not have enough context + * to adjust the reservation map. This case deals primarily with private + * mappings. Adjust the reserve map here to be consistent with global + * reserve count adjustments to be made by free_huge_page. Make sure the + * reserve map indicates there is a reservation present. + * + * In case 2, simply undo reserve map modifications done by alloc_huge_page. */ -static void restore_reserve_on_error(struct hstate *h, - struct vm_area_struct *vma, unsigned long address, - struct page *page) +void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, + unsigned long address, struct page *page) { - if (unlikely(HPageRestoreReserve(page))) { - long rc = vma_needs_reservation(h, vma, address); + long rc = vma_needs_reservation(h, vma, address); - if (unlikely(rc < 0)) { + if (HPageRestoreReserve(page)) { + if (unlikely(rc < 0)) /* * Rare out of memory condition in reserve map * manipulation. Clear HPageRestoreReserve so that @@ -2253,16 +2283,57 @@ static void restore_reserve_on_error(struct hstate *h, * accounting of reserve counts. */ ClearHPageRestoreReserve(page); - } else if (rc) { - rc = vma_add_reservation(h, vma, address); - if (unlikely(rc < 0)) + else if (rc) + (void)vma_add_reservation(h, vma, address); + else + vma_end_reservation(h, vma, address); + } else { + if (!rc) { + /* + * This indicates there is an entry in the reserve map + * added by alloc_huge_page. We know it was added + * before the alloc_huge_page call, otherwise + * HPageRestoreReserve would be set on the page. + * Remove the entry so that a subsequent allocation + * does not consume a reservation. + */ + rc = vma_del_reservation(h, vma, address); + if (rc < 0) + /* + * VERY rare out of memory condition. Since + * we can not delete the entry, set + * HPageRestoreReserve so that the reserve + * count will be incremented when the page + * is freed. This reserve will be consumed + * on a subsequent allocation. + */ + SetHPageRestoreReserve(page); + } else if (rc < 0) { + /* + * Rare out of memory condition from + * vma_needs_reservation call. Memory allocation is + * only attempted if a new entry is needed. Therefore, + * this implies there is not an entry in the + * reserve map. + * + * For shared mappings, no entry in the map indicates + * no reservation. We are done. + */ + if (!(vma->vm_flags & VM_MAYSHARE)) /* - * See above comment about rare out of - * memory condition. + * For private mappings, no entry indicates + * a reservation is present. Since we can + * not add an entry, set SetHPageRestoreReserve + * on the page so reserve count will be + * incremented when freed. This reserve will + * be consumed on a subsequent allocation. */ - ClearHPageRestoreReserve(page); + SetHPageRestoreReserve(page); } else - vma_end_reservation(h, vma, address); + /* + * No reservation present, do nothing + */ + vma_end_reservation(h, vma, address); } } @@ -4037,6 +4108,8 @@ again: spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); entry = huge_ptep_get(src_pte); if (!pte_same(src_pte_old, entry)) { + restore_reserve_on_error(h, vma, addr, + new); put_page(new); /* dst_entry won't change as in child */ goto again; @@ -5006,6 +5079,7 @@ out_release_unlock: if (vm_shared || is_continue) unlock_page(page); out_release_nounlock: + restore_reserve_on_error(h, dst_vma, dst_addr, page); put_page(page); goto out; } @@ -5857,6 +5931,21 @@ unlock: return ret; } +int get_hwpoison_huge_page(struct page *page, bool *hugetlb) +{ + int ret = 0; + + *hugetlb = false; + spin_lock_irq(&hugetlb_lock); + if (PageHeadHuge(page)) { + *hugetlb = true; + if (HPageFreed(page) || HPageMigratable(page)) + ret = get_page_unless_zero(page); + } + spin_unlock_irq(&hugetlb_lock); + return ret; +} + void putback_active_hugepage(struct page *page) { spin_lock_irq(&hugetlb_lock); diff --git a/mm/internal.h b/mm/internal.h index 2f1182948aa6..e8fdb531f887 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -384,27 +384,52 @@ static inline void mlock_migrate_page(struct page *newpage, struct page *page) extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma); /* - * At what user virtual address is page expected in @vma? + * At what user virtual address is page expected in vma? + * Returns -EFAULT if all of the page is outside the range of vma. + * If page is a compound head, the entire compound page is considered. */ static inline unsigned long -__vma_address(struct page *page, struct vm_area_struct *vma) +vma_address(struct page *page, struct vm_area_struct *vma) { - pgoff_t pgoff = page_to_pgoff(page); - return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); + pgoff_t pgoff; + unsigned long address; + + VM_BUG_ON_PAGE(PageKsm(page), page); /* KSM page->index unusable */ + pgoff = page_to_pgoff(page); + if (pgoff >= vma->vm_pgoff) { + address = vma->vm_start + + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); + /* Check for address beyond vma (or wrapped through 0?) */ + if (address < vma->vm_start || address >= vma->vm_end) + address = -EFAULT; + } else if (PageHead(page) && + pgoff + compound_nr(page) - 1 >= vma->vm_pgoff) { + /* Test above avoids possibility of wrap to 0 on 32-bit */ + address = vma->vm_start; + } else { + address = -EFAULT; + } + return address; } +/* + * Then at what user virtual address will none of the page be found in vma? + * Assumes that vma_address() already returned a good starting address. + * If page is a compound head, the entire compound page is considered. + */ static inline unsigned long -vma_address(struct page *page, struct vm_area_struct *vma) +vma_address_end(struct page *page, struct vm_area_struct *vma) { - unsigned long start, end; - - start = __vma_address(page, vma); - end = start + thp_size(page) - PAGE_SIZE; - - /* page should be within @vma mapping range */ - VM_BUG_ON_VMA(end < vma->vm_start || start >= vma->vm_end, vma); - - return max(start, vma->vm_start); + pgoff_t pgoff; + unsigned long address; + + VM_BUG_ON_PAGE(PageKsm(page), page); /* KSM page->index unusable */ + pgoff = page_to_pgoff(page) + compound_nr(page); + address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); + /* Check for address beyond vma (or wrapped through 0?) */ + if (address < vma->vm_start || address > vma->vm_end) + address = vma->vm_end; + return address; } static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf, diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 85ad98c00fd9..0143d32bc666 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -949,6 +949,17 @@ static int page_action(struct page_state *ps, struct page *p, return (result == MF_RECOVERED || result == MF_DELAYED) ? 0 : -EBUSY; } +/* + * Return true if a page type of a given page is supported by hwpoison + * mechanism (while handling could fail), otherwise false. This function + * does not return true for hugetlb or device memory pages, so it's assumed + * to be called only in the context where we never have such pages. + */ +static inline bool HWPoisonHandlable(struct page *page) +{ + return PageLRU(page) || __PageMovable(page); +} + /** * __get_hwpoison_page() - Get refcount for memory error handling: * @page: raw error page (hit by memory error) @@ -959,8 +970,22 @@ static int page_action(struct page_state *ps, struct page *p, static int __get_hwpoison_page(struct page *page) { struct page *head = compound_head(page); + int ret = 0; + bool hugetlb = false; + + ret = get_hwpoison_huge_page(head, &hugetlb); + if (hugetlb) + return ret; - if (!PageHuge(head) && PageTransHuge(head)) { + /* + * This check prevents from calling get_hwpoison_unless_zero() + * for any unsupported type of page in order to reduce the risk of + * unexpected races caused by taking a page refcount. + */ + if (!HWPoisonHandlable(head)) + return 0; + + if (PageTransHuge(head)) { /* * Non anonymous thp exists only in allocation/free time. We * can't handle such a case correctly, so let's give it up. @@ -1017,7 +1042,7 @@ try_again: ret = -EIO; } } else { - if (PageHuge(p) || PageLRU(p) || __PageMovable(p)) { + if (PageHuge(p) || HWPoisonHandlable(p)) { ret = 1; } else { /* @@ -1527,7 +1552,12 @@ try_again: return 0; } - if (!PageTransTail(p) && !PageLRU(p)) + /* + * __munlock_pagevec may clear a writeback page's LRU flag without + * page_lock. We need wait writeback completion for this page or it + * may trigger vfs BUG while evict inode. + */ + if (!PageTransTail(p) && !PageLRU(p) && !PageWriteback(p)) goto identify_page_state; /* diff --git a/mm/memory.c b/mm/memory.c index f3ffab9b9e39..486f4a2874e7 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1361,7 +1361,18 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, else if (zap_huge_pmd(tlb, vma, pmd, addr)) goto next; /* fall through */ + } else if (details && details->single_page && + PageTransCompound(details->single_page) && + next - addr == HPAGE_PMD_SIZE && pmd_none(*pmd)) { + spinlock_t *ptl = pmd_lock(tlb->mm, pmd); + /* + * Take and drop THP pmd lock so that we cannot return + * prematurely, while zap_huge_pmd() has cleared *pmd, + * but not yet decremented compound_mapcount(). + */ + spin_unlock(ptl); } + /* * Here there can be other concurrent MADV_DONTNEED or * trans huge page faults running, and if the pmd is @@ -3237,6 +3248,36 @@ static inline void unmap_mapping_range_tree(struct rb_root_cached *root, } /** + * unmap_mapping_page() - Unmap single page from processes. + * @page: The locked page to be unmapped. + * + * Unmap this page from any userspace process which still has it mmaped. + * Typically, for efficiency, the range of nearby pages has already been + * unmapped by unmap_mapping_pages() or unmap_mapping_range(). But once + * truncation or invalidation holds the lock on a page, it may find that + * the page has been remapped again: and then uses unmap_mapping_page() + * to unmap it finally. + */ +void unmap_mapping_page(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct zap_details details = { }; + + VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON(PageTail(page)); + + details.check_mapping = mapping; + details.first_index = page->index; + details.last_index = page->index + thp_nr_pages(page) - 1; + details.single_page = page; + + i_mmap_lock_write(mapping); + if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) + unmap_mapping_range_tree(&mapping->i_mmap, &details); + i_mmap_unlock_write(mapping); +} + +/** * unmap_mapping_pages() - Unmap pages from processes. * @mapping: The address space containing pages to be unmapped. * @start: Index of first page to be unmapped. diff --git a/mm/migrate.c b/mm/migrate.c index b234c3f3acb7..41ff2c9896c4 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -295,6 +295,7 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, goto out; page = migration_entry_to_page(entry); + page = compound_head(page); /* * Once page cache replacement of page migration started, page_count diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index 2cf01d933f13..e37bd43904af 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -212,23 +212,34 @@ restart: pvmw->ptl = NULL; } } else if (!pmd_present(pmde)) { + /* + * If PVMW_SYNC, take and drop THP pmd lock so that we + * cannot return prematurely, while zap_huge_pmd() has + * cleared *pmd but not decremented compound_mapcount(). + */ + if ((pvmw->flags & PVMW_SYNC) && + PageTransCompound(pvmw->page)) { + spinlock_t *ptl = pmd_lock(mm, pvmw->pmd); + + spin_unlock(ptl); + } return false; } if (!map_pte(pvmw)) goto next_pte; while (1) { + unsigned long end; + if (check_pte(pvmw)) return true; next_pte: /* Seek to next pte only makes sense for THP */ if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page)) return not_found(pvmw); + end = vma_address_end(pvmw->page, pvmw->vma); do { pvmw->address += PAGE_SIZE; - if (pvmw->address >= pvmw->vma->vm_end || - pvmw->address >= - __vma_address(pvmw->page, pvmw->vma) + - thp_size(pvmw->page)) + if (pvmw->address >= end) return not_found(pvmw); /* Did we cross page table boundary? */ if (pvmw->address % PMD_SIZE == 0) { @@ -266,14 +277,10 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma) .vma = vma, .flags = PVMW_SYNC, }; - unsigned long start, end; - - start = __vma_address(page, vma); - end = start + thp_size(page) - PAGE_SIZE; - if (unlikely(end < vma->vm_start || start >= vma->vm_end)) + pvmw.address = vma_address(page, vma); + if (pvmw.address == -EFAULT) return 0; - pvmw.address = max(start, vma->vm_start); if (!page_vma_mapped_walk(&pvmw)) return 0; page_vma_mapped_walk_done(&pvmw); diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index c2210e1cdb51..4e640baf9794 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -135,9 +135,8 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address, { pmd_t pmd; VM_BUG_ON(address & ~HPAGE_PMD_MASK); - VM_BUG_ON(!pmd_present(*pmdp)); - /* Below assumes pmd_present() is true */ - VM_BUG_ON(!pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp)); + VM_BUG_ON(pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) && + !pmd_devmap(*pmdp)); pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); return pmd; diff --git a/mm/rmap.c b/mm/rmap.c index 693a610e181d..e05c300048e6 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -707,7 +707,6 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags) */ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) { - unsigned long address; if (PageAnon(page)) { struct anon_vma *page__anon_vma = page_anon_vma(page); /* @@ -717,15 +716,13 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) if (!vma->anon_vma || !page__anon_vma || vma->anon_vma->root != page__anon_vma->root) return -EFAULT; - } else if (page->mapping) { - if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping) - return -EFAULT; - } else + } else if (!vma->vm_file) { return -EFAULT; - address = __vma_address(page, vma); - if (unlikely(address < vma->vm_start || address >= vma->vm_end)) + } else if (vma->vm_file->f_mapping != compound_head(page)->mapping) { return -EFAULT; - return address; + } + + return vma_address(page, vma); } pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address) @@ -919,7 +916,7 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, */ mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE, 0, vma, vma->vm_mm, address, - min(vma->vm_end, address + page_size(page))); + vma_address_end(page, vma)); mmu_notifier_invalidate_range_start(&range); while (page_vma_mapped_walk(&pvmw)) { @@ -1405,6 +1402,15 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, struct mmu_notifier_range range; enum ttu_flags flags = (enum ttu_flags)(long)arg; + /* + * When racing against e.g. zap_pte_range() on another cpu, + * in between its ptep_get_and_clear_full() and page_remove_rmap(), + * try_to_unmap() may return false when it is about to become true, + * if page table locking is skipped: use TTU_SYNC to wait for that. + */ + if (flags & TTU_SYNC) + pvmw.flags = PVMW_SYNC; + /* munlock has nothing to gain from examining un-locked vmas */ if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED)) return true; @@ -1426,9 +1432,10 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, * Note that the page can not be free in this function as call of * try_to_unmap() must hold a reference on the page. */ + range.end = PageKsm(page) ? + address + PAGE_SIZE : vma_address_end(page, vma); mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, - address, - min(vma->vm_end, address + page_size(page))); + address, range.end); if (PageHuge(page)) { /* * If sharing is possible, start and end will be adjusted @@ -1777,7 +1784,13 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags) else rmap_walk(page, &rwc); - return !page_mapcount(page) ? true : false; + /* + * When racing against e.g. zap_pte_range() on another cpu, + * in between its ptep_get_and_clear_full() and page_remove_rmap(), + * try_to_unmap() may return false when it is about to become true, + * if page table locking is skipped: use TTU_SYNC to wait for that. + */ + return !page_mapcount(page); } /** @@ -1874,6 +1887,7 @@ static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc, struct vm_area_struct *vma = avc->vma; unsigned long address = vma_address(page, vma); + VM_BUG_ON_VMA(address == -EFAULT, vma); cond_resched(); if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg)) @@ -1928,6 +1942,7 @@ static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc, pgoff_start, pgoff_end) { unsigned long address = vma_address(page, vma); + VM_BUG_ON_VMA(address == -EFAULT, vma); cond_resched(); if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg)) diff --git a/mm/slab_common.c b/mm/slab_common.c index a4a571428c51..7cab77655f11 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -97,8 +97,7 @@ EXPORT_SYMBOL(kmem_cache_size); #ifdef CONFIG_DEBUG_VM static int kmem_cache_sanity_check(const char *name, unsigned int size) { - if (!name || in_interrupt() || size < sizeof(void *) || - size > KMALLOC_MAX_SIZE) { + if (!name || in_interrupt() || size > KMALLOC_MAX_SIZE) { pr_err("kmem_cache_create(%s) integrity check failed\n", name); return -EINVAL; } diff --git a/mm/slub.c b/mm/slub.c index 3f96e099817a..61bd40e3eb9a 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -15,6 +15,7 @@ #include <linux/module.h> #include <linux/bit_spinlock.h> #include <linux/interrupt.h> +#include <linux/swab.h> #include <linux/bitops.h> #include <linux/slab.h> #include "slab.h" @@ -712,15 +713,15 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) p, p - addr, get_freepointer(s, p)); if (s->flags & SLAB_RED_ZONE) - print_section(KERN_ERR, "Redzone ", p - s->red_left_pad, + print_section(KERN_ERR, "Redzone ", p - s->red_left_pad, s->red_left_pad); else if (p > addr + 16) print_section(KERN_ERR, "Bytes b4 ", p - 16, 16); - print_section(KERN_ERR, "Object ", p, + print_section(KERN_ERR, "Object ", p, min_t(unsigned int, s->object_size, PAGE_SIZE)); if (s->flags & SLAB_RED_ZONE) - print_section(KERN_ERR, "Redzone ", p + s->object_size, + print_section(KERN_ERR, "Redzone ", p + s->object_size, s->inuse - s->object_size); off = get_info_end(s); @@ -732,7 +733,7 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) if (off != size_from_object(s)) /* Beginning of the filler is the free pointer */ - print_section(KERN_ERR, "Padding ", p + off, + print_section(KERN_ERR, "Padding ", p + off, size_from_object(s) - off); dump_stack(); @@ -909,11 +910,11 @@ static int check_object(struct kmem_cache *s, struct page *page, u8 *endobject = object + s->object_size; if (s->flags & SLAB_RED_ZONE) { - if (!check_bytes_and_report(s, page, object, "Redzone", + if (!check_bytes_and_report(s, page, object, "Left Redzone", object - s->red_left_pad, val, s->red_left_pad)) return 0; - if (!check_bytes_and_report(s, page, object, "Redzone", + if (!check_bytes_and_report(s, page, object, "Right Redzone", endobject, val, s->inuse - s->object_size)) return 0; } else { @@ -928,7 +929,7 @@ static int check_object(struct kmem_cache *s, struct page *page, if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) && (!check_bytes_and_report(s, page, p, "Poison", p, POISON_FREE, s->object_size - 1) || - !check_bytes_and_report(s, page, p, "Poison", + !check_bytes_and_report(s, page, p, "End Poison", p + s->object_size - 1, POISON_END, 1))) return 0; /* @@ -3689,7 +3690,6 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) { slab_flags_t flags = s->flags; unsigned int size = s->object_size; - unsigned int freepointer_area; unsigned int order; /* @@ -3698,13 +3698,6 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) * the possible location of the free pointer. */ size = ALIGN(size, sizeof(void *)); - /* - * This is the area of the object where a freepointer can be - * safely written. If redzoning adds more to the inuse size, we - * can't use that portion for writing the freepointer, so - * s->offset must be limited within this for the general case. - */ - freepointer_area = size; #ifdef CONFIG_SLUB_DEBUG /* @@ -3730,19 +3723,21 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) /* * With that we have determined the number of bytes in actual use - * by the object. This is the potential offset to the free pointer. + * by the object and redzoning. */ s->inuse = size; - if (((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) || - s->ctor)) { + if ((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) || + ((flags & SLAB_RED_ZONE) && s->object_size < sizeof(void *)) || + s->ctor) { /* * Relocate free pointer after the object if it is not * permitted to overwrite the first word of the object on * kmem_cache_free. * * This is the case if we do RCU, have a constructor or - * destructor or are poisoning the objects. + * destructor, are poisoning the objects, or are + * redzoning an object smaller than sizeof(void *). * * The assumption that s->offset >= s->inuse means free * pointer is outside of the object is used in the @@ -3751,13 +3746,13 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) */ s->offset = size; size += sizeof(void *); - } else if (freepointer_area > sizeof(void *)) { + } else { /* * Store freelist pointer near middle of object to keep * it away from the edges of the object to avoid small * sized over/underflows from neighboring allocations. */ - s->offset = ALIGN(freepointer_area / 2, sizeof(void *)); + s->offset = ALIGN_DOWN(s->object_size / 2, sizeof(void *)); } #ifdef CONFIG_SLUB_DEBUG diff --git a/mm/sparse.c b/mm/sparse.c index b2ada9dc00cb..55c18aff3e42 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -344,6 +344,15 @@ size_t mem_section_usage_size(void) return sizeof(struct mem_section_usage) + usemap_size(); } +static inline phys_addr_t pgdat_to_phys(struct pglist_data *pgdat) +{ +#ifndef CONFIG_NEED_MULTIPLE_NODES + return __pa_symbol(pgdat); +#else + return __pa(pgdat); +#endif +} + #ifdef CONFIG_MEMORY_HOTREMOVE static struct mem_section_usage * __init sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, @@ -362,7 +371,7 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, * from the same section as the pgdat where possible to avoid * this problem. */ - goal = __pa(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT); + goal = pgdat_to_phys(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT); limit = goal + (1UL << PA_SECTION_SHIFT); nid = early_pfn_to_nid(goal >> PAGE_SHIFT); again: @@ -390,7 +399,7 @@ static void __init check_usemap_section_nr(int nid, } usemap_snr = pfn_to_section_nr(__pa(usage) >> PAGE_SHIFT); - pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT); + pgdat_snr = pfn_to_section_nr(pgdat_to_phys(pgdat) >> PAGE_SHIFT); if (usemap_snr == pgdat_snr) return; diff --git a/mm/swapfile.c b/mm/swapfile.c index 149e77454e3c..996afa8131c8 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1900,7 +1900,7 @@ unsigned int count_swap_pages(int type, int free) static inline int pte_same_as_swp(pte_t pte, pte_t swp_pte) { - return pte_same(pte_swp_clear_soft_dirty(pte), swp_pte); + return pte_same(pte_swp_clear_flags(pte), swp_pte); } /* diff --git a/mm/truncate.c b/mm/truncate.c index 95af244b112a..234ddd879caa 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -167,13 +167,10 @@ void do_invalidatepage(struct page *page, unsigned int offset, * its lock, b) when a concurrent invalidate_mapping_pages got there first and * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space. */ -static void -truncate_cleanup_page(struct address_space *mapping, struct page *page) +static void truncate_cleanup_page(struct page *page) { - if (page_mapped(page)) { - unsigned int nr = thp_nr_pages(page); - unmap_mapping_pages(mapping, page->index, nr, false); - } + if (page_mapped(page)) + unmap_mapping_page(page); if (page_has_private(page)) do_invalidatepage(page, 0, thp_size(page)); @@ -218,7 +215,7 @@ int truncate_inode_page(struct address_space *mapping, struct page *page) if (page->mapping != mapping) return -EIO; - truncate_cleanup_page(mapping, page); + truncate_cleanup_page(page); delete_from_page_cache(page); return 0; } @@ -325,7 +322,7 @@ void truncate_inode_pages_range(struct address_space *mapping, index = indices[pagevec_count(&pvec) - 1] + 1; truncate_exceptional_pvec_entries(mapping, &pvec, indices); for (i = 0; i < pagevec_count(&pvec); i++) - truncate_cleanup_page(mapping, pvec.pages[i]); + truncate_cleanup_page(pvec.pages[i]); delete_from_page_cache_batch(mapping, &pvec); for (i = 0; i < pagevec_count(&pvec); i++) unlock_page(pvec.pages[i]); @@ -639,6 +636,16 @@ int invalidate_inode_pages2_range(struct address_space *mapping, continue; } + if (!did_range_unmap && page_mapped(page)) { + /* + * If page is mapped, before taking its lock, + * zap the rest of the file in one hit. + */ + unmap_mapping_pages(mapping, index, + (1 + end - index), false); + did_range_unmap = 1; + } + lock_page(page); WARN_ON(page_to_index(page) != index); if (page->mapping != mapping) { @@ -646,23 +653,11 @@ int invalidate_inode_pages2_range(struct address_space *mapping, continue; } wait_on_page_writeback(page); - if (page_mapped(page)) { - if (!did_range_unmap) { - /* - * Zap the rest of the file in one hit. - */ - unmap_mapping_pages(mapping, index, - (1 + end - index), false); - did_range_unmap = 1; - } else { - /* - * Just zap this page - */ - unmap_mapping_pages(mapping, index, - 1, false); - } - } + + if (page_mapped(page)) + unmap_mapping_page(page); BUG_ON(page_mapped(page)); + ret2 = do_launder_page(mapping, page); if (ret2 == 0) { if (!invalidate_complete_page2(mapping, page)) diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 5c70596dd1b9..a2b732cf96ea 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -82,7 +82,7 @@ int kvm_check_cap(long cap) kvm_fd = open_kvm_dev_path_or_exit(); ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap); - TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n" + TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION IOCTL failed,\n" " rc: %i errno: %i", ret, errno); close(kvm_fd); diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 6ad6c8276b2e..af1031fed97f 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -166,75 +166,75 @@ size_t get_def_hugetlb_pagesz(void) return 0; } +#define ANON_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS) +#define ANON_HUGE_FLAGS (ANON_FLAGS | MAP_HUGETLB) + const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i) { - static const int anon_flags = MAP_PRIVATE | MAP_ANONYMOUS; - static const int anon_huge_flags = anon_flags | MAP_HUGETLB; - static const struct vm_mem_backing_src_alias aliases[] = { [VM_MEM_SRC_ANONYMOUS] = { .name = "anonymous", - .flag = anon_flags, + .flag = ANON_FLAGS, }, [VM_MEM_SRC_ANONYMOUS_THP] = { .name = "anonymous_thp", - .flag = anon_flags, + .flag = ANON_FLAGS, }, [VM_MEM_SRC_ANONYMOUS_HUGETLB] = { .name = "anonymous_hugetlb", - .flag = anon_huge_flags, + .flag = ANON_HUGE_FLAGS, }, [VM_MEM_SRC_ANONYMOUS_HUGETLB_16KB] = { .name = "anonymous_hugetlb_16kb", - .flag = anon_huge_flags | MAP_HUGE_16KB, + .flag = ANON_HUGE_FLAGS | MAP_HUGE_16KB, }, [VM_MEM_SRC_ANONYMOUS_HUGETLB_64KB] = { .name = "anonymous_hugetlb_64kb", - .flag = anon_huge_flags | MAP_HUGE_64KB, + .flag = ANON_HUGE_FLAGS | MAP_HUGE_64KB, }, [VM_MEM_SRC_ANONYMOUS_HUGETLB_512KB] = { .name = "anonymous_hugetlb_512kb", - .flag = anon_huge_flags | MAP_HUGE_512KB, + .flag = ANON_HUGE_FLAGS | MAP_HUGE_512KB, }, [VM_MEM_SRC_ANONYMOUS_HUGETLB_1MB] = { .name = "anonymous_hugetlb_1mb", - .flag = anon_huge_flags | MAP_HUGE_1MB, + .flag = ANON_HUGE_FLAGS | MAP_HUGE_1MB, }, [VM_MEM_SRC_ANONYMOUS_HUGETLB_2MB] = { .name = "anonymous_hugetlb_2mb", - .flag = anon_huge_flags | MAP_HUGE_2MB, + .flag = ANON_HUGE_FLAGS | MAP_HUGE_2MB, }, [VM_MEM_SRC_ANONYMOUS_HUGETLB_8MB] = { .name = "anonymous_hugetlb_8mb", - .flag = anon_huge_flags | MAP_HUGE_8MB, + .flag = ANON_HUGE_FLAGS | MAP_HUGE_8MB, }, [VM_MEM_SRC_ANONYMOUS_HUGETLB_16MB] = { .name = "anonymous_hugetlb_16mb", - .flag = anon_huge_flags | MAP_HUGE_16MB, + .flag = ANON_HUGE_FLAGS | MAP_HUGE_16MB, }, [VM_MEM_SRC_ANONYMOUS_HUGETLB_32MB] = { .name = "anonymous_hugetlb_32mb", - .flag = anon_huge_flags | MAP_HUGE_32MB, + .flag = ANON_HUGE_FLAGS | MAP_HUGE_32MB, }, [VM_MEM_SRC_ANONYMOUS_HUGETLB_256MB] = { .name = "anonymous_hugetlb_256mb", - .flag = anon_huge_flags | MAP_HUGE_256MB, + .flag = ANON_HUGE_FLAGS | MAP_HUGE_256MB, }, [VM_MEM_SRC_ANONYMOUS_HUGETLB_512MB] = { .name = "anonymous_hugetlb_512mb", - .flag = anon_huge_flags | MAP_HUGE_512MB, + .flag = ANON_HUGE_FLAGS | MAP_HUGE_512MB, }, [VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB] = { .name = "anonymous_hugetlb_1gb", - .flag = anon_huge_flags | MAP_HUGE_1GB, + .flag = ANON_HUGE_FLAGS | MAP_HUGE_1GB, }, [VM_MEM_SRC_ANONYMOUS_HUGETLB_2GB] = { .name = "anonymous_hugetlb_2gb", - .flag = anon_huge_flags | MAP_HUGE_2GB, + .flag = ANON_HUGE_FLAGS | MAP_HUGE_2GB, }, [VM_MEM_SRC_ANONYMOUS_HUGETLB_16GB] = { .name = "anonymous_hugetlb_16gb", - .flag = anon_huge_flags | MAP_HUGE_16GB, + .flag = ANON_HUGE_FLAGS | MAP_HUGE_16GB, }, [VM_MEM_SRC_SHMEM] = { .name = "shmem", |