diff options
author | Dan Williams <[email protected]> | 2020-10-13 16:49:28 -0700 |
---|---|---|
committer | Linus Torvalds <[email protected]> | 2020-10-13 18:38:28 -0700 |
commit | 5ccac54f3e124a49789c3773d5a351e87470cf12 (patch) | |
tree | ddee395823c58f32d4d95b525bc6dab2fd78cf72 | |
parent | a035b6bf863e5c42c2746de2a8ed6600140307e7 (diff) |
ACPI: HMAT: attach a device for each soft-reserved range
The hmem enabling in commit cf8741ac57ed ("ACPI: NUMA: HMAT: Register
"soft reserved" memory as an "hmem" device") only registered ranges to the
hmem driver for each soft-reservation that also appeared in the HMAT.
While this is meant to encourage platform firmware to "do the right thing"
and publish an HMAT, the corollary is that platforms that fail to publish
an accurate HMAT will strand memory from Linux usage. Additionally, the
"efi_fake_mem" kernel command line option enabling will strand memory by
default without an HMAT.
Arrange for "soft reserved" memory that goes unclaimed by HMAT entries to
be published as raw resource ranges for the hmem driver to consume.
Include a module parameter to disable either this fallback behavior, or
the hmat enabling from creating hmem devices. The module parameter
requires the hmem device enabling to have unique name in the module
namespace: "device_hmem".
The driver depends on the architecture providing phys_to_target_node()
which is only x86 via numa_meminfo() and arm64 via a generic memblock
implementation.
[[email protected]: require NUMA_KEEP_MEMINFO for phys_to_target_node()]
Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Dan Williams <[email protected]>
Signed-off-by: Joao Martins <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Reviewed-by: Joao Martins <[email protected]>
Cc: Jonathan Cameron <[email protected]>
Cc: Brice Goglin <[email protected]>
Cc: Jeff Moyer <[email protected]>
Cc: Catalin Marinas <[email protected]>
Cc: Will Deacon <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Ard Biesheuvel <[email protected]>
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Ben Skeggs <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Daniel Vetter <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: Dave Jiang <[email protected]>
Cc: David Airlie <[email protected]>
Cc: David Hildenbrand <[email protected]>
Cc: Greg Kroah-Hartman <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Ira Weiny <[email protected]>
Cc: Jason Gunthorpe <[email protected]>
Cc: Jia He <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Mike Rapoport <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Pavel Tatashin <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rafael J. Wysocki <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Tom Lendacky <[email protected]>
Cc: Vishal Verma <[email protected]>
Cc: Wei Yang <[email protected]>
Cc: Ard Biesheuvel <[email protected]>
Cc: Bjorn Helgaas <[email protected]>
Cc: Boris Ostrovsky <[email protected]>
Cc: Hulk Robot <[email protected]>
Cc: Jason Yan <[email protected]>
Cc: "Jérôme Glisse" <[email protected]>
Cc: Juergen Gross <[email protected]>
Cc: kernel test robot <[email protected]>
Cc: Randy Dunlap <[email protected]>
Cc: Stefano Stabellini <[email protected]>
Cc: Vivek Goyal <[email protected]>
Link: https://lkml.kernel.org/r/159643098298.4062302.17587338161136144730.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Linus Torvalds <[email protected]>
-rw-r--r-- | drivers/dax/Kconfig | 2 | ||||
-rw-r--r-- | drivers/dax/hmem/Makefile | 3 | ||||
-rw-r--r-- | drivers/dax/hmem/device.c | 35 |
3 files changed, 39 insertions, 1 deletions
diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig index a66dff78f298..567428e10b7b 100644 --- a/drivers/dax/Kconfig +++ b/drivers/dax/Kconfig @@ -35,6 +35,7 @@ config DEV_DAX_PMEM config DEV_DAX_HMEM tristate "HMEM DAX: direct access to 'specific purpose' memory" depends on EFI_SOFT_RESERVE + select NUMA_KEEP_MEMINFO if (NUMA && X86) default DEV_DAX help EFI 2.8 platforms, and others, may advertise 'specific purpose' @@ -49,6 +50,7 @@ config DEV_DAX_HMEM Say M if unsure. config DEV_DAX_HMEM_DEVICES + depends on NUMA_KEEP_MEMINFO # for phys_to_target_node() depends on DEV_DAX_HMEM && DAX=y def_bool y diff --git a/drivers/dax/hmem/Makefile b/drivers/dax/hmem/Makefile index a9d353d0c9ed..57377b4c3d47 100644 --- a/drivers/dax/hmem/Makefile +++ b/drivers/dax/hmem/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_DEV_DAX_HMEM) += dax_hmem.o -obj-$(CONFIG_DEV_DAX_HMEM_DEVICES) += device.o +obj-$(CONFIG_DEV_DAX_HMEM_DEVICES) += device_hmem.o +device_hmem-y := device.o dax_hmem-y := hmem.o diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c index b9dd6b27745c..cb6401c9e9a4 100644 --- a/drivers/dax/hmem/device.c +++ b/drivers/dax/hmem/device.c @@ -5,6 +5,9 @@ #include <linux/dax.h> #include <linux/mm.h> +static bool nohmem; +module_param_named(disable, nohmem, bool, 0444); + void hmem_register_device(int target_nid, struct resource *r) { /* define a clean / non-busy resource for the platform device */ @@ -17,6 +20,9 @@ void hmem_register_device(int target_nid, struct resource *r) struct memregion_info info; int rc, id; + if (nohmem) + return; + rc = region_intersects(res.start, resource_size(&res), IORESOURCE_MEM, IORES_DESC_SOFT_RESERVED); if (rc != REGION_INTERSECTS) @@ -63,3 +69,32 @@ out_resource: out_pdev: memregion_free(id); } + +static __init int hmem_register_one(struct resource *res, void *data) +{ + /* + * If the resource is not a top-level resource it was already + * assigned to a device by the HMAT parsing. + */ + if (res->parent != &iomem_resource) { + pr_info("HMEM: skip %pr, already claimed\n", res); + return 0; + } + + hmem_register_device(phys_to_target_node(res->start), res); + + return 0; +} + +static __init int hmem_init(void) +{ + walk_iomem_res_desc(IORES_DESC_SOFT_RESERVED, + IORESOURCE_MEM, 0, -1, NULL, hmem_register_one); + return 0; +} + +/* + * As this is a fallback for address ranges unclaimed by the ACPI HMAT + * parsing it must be at an initcall level greater than hmat_init(). + */ +late_initcall(hmem_init); |