SLUB: Add support for per object memory policies

The old SLAB allocator used to support memory policies on a per allocation bases. In SLUB the memory policies are applied on a per page frame / folio bases. Doing so avoids having to check memory policies in critical code paths for kmalloc and friends. This worked on general well on Intel/AMD/PowerPC because the interconnect technology is mature and can minimize the latencies through intelligent caching even if a small object is not placed optimally. However, on ARM we have an emergence of new NUMA interconnect technology based more on embedded devices. Caching of remote content can currently be ineffective using the standard building blocks / mesh available on that platform. Such architectures benefit if each slab object is individually placed according to memory policies and other restrictions. This patch adds another kernel parameter slab_strict_numa If that is set then a static branch is activated that will cause the hotpaths of the allocator to evaluate the current memory allocation policy. Each object will be properly placed by paying the price of extra processing and SLUB will no longer defer to the page allocator to apply memory policies at the folio level. This patch improves performance of memcached running on Ampere Altra 2P system (ARM Neoverse N1 processor) by 3.6% due to accurate placement of small kernel objects. Tested-by: Huang Shijie <[email protected]> Signed-off-by: Christoph Lameter (Ampere) <[email protected]> Signed-off-by: Vlastimil Babka <[email protected]>
author: Christoph Lameter <[email protected]> 2024-10-01 12:08:06 -0700
committer: Vlastimil Babka <[email protected]> 2024-10-29 10:43:53 +0100
commit: f7c80fad6c2b64cf73361772dbd30493879e85f4 (patch)
tree: d01eaa7cc320b722c72d1279f0ce636291b537c2
parent: 704573851b51808b45dae2d62059d1d8189138a2 (diff)
3 files changed, 62 insertions, 0 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 1518343bbe22..9be54e9a55d3 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6147,6 +6147,16 @@
 			For more information see Documentation/mm/slub.rst.
 			(slub_nomerge legacy name also accepted for now)
 
+	slab_strict_numa	[MM]
+			Support memory policies on a per object level
+			in the slab allocator. The default is for memory
+			policies to be applied at the folio level when
+			a new folio is needed or a partial folio is
+			retrieved from the lists. Increases overhead
+			in the slab fastpaths but gains more accurate
+			NUMA kernel object placement which helps with slow
+			interconnects in NUMA systems.
+
 	slram=		[HW,MTD]
 
 	smart2=		[HW]
diff --git a/Documentation/mm/slub.rst b/Documentation/mm/slub.rst
index 60d350d08362..84ca1dc94e5e 100644
--- a/Documentation/mm/slub.rst
+++ b/Documentation/mm/slub.rst
@@ -175,6 +175,15 @@ can be influenced by kernel parameters:
 	``slab_max_order`` to 0, what cause minimum possible order of
 	slabs allocation.
 
+``slab_strict_numa``
+        Enables the application of memory policies on each
+        allocation. This results in more accurate placement of
+        objects which may result in the reduction of accesses
+        to remote nodes. The default is to only apply memory
+        policies at the folio level when a new folio is acquired
+        or a folio is retrieved from the lists. Enabling this
+        option reduces the fastpath performance of the slab allocator.
+
 SLUB Debug output
 =================
 
diff --git a/mm/slub.c b/mm/slub.c
index 5b832512044e..d4b1680ef17a 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -218,6 +218,10 @@ DEFINE_STATIC_KEY_FALSE(slub_debug_enabled);
 #endif
 #endif		/* CONFIG_SLUB_DEBUG */
 
+#ifdef CONFIG_NUMA
+static DEFINE_STATIC_KEY_FALSE(strict_numa);
+#endif
+
 /* Structure holding parameters for get_partial() call chain */
 struct partial_context {
 	gfp_t flags;
@@ -3956,6 +3960,28 @@ redo:
 	object = c->freelist;
 	slab = c->slab;
 
+#ifdef CONFIG_NUMA
+	if (static_branch_unlikely(&strict_numa) &&
+			node == NUMA_NO_NODE) {
+
+		struct mempolicy *mpol = current->mempolicy;
+
+		if (mpol) {
+			/*
+			 * Special BIND rule support. If existing slab
+			 * is in permitted set then do not redirect
+			 * to a particular node.
+			 * Otherwise we apply the memory policy to get
+			 * the node we need to allocate on.
+			 */
+			if (mpol->mode != MPOL_BIND || !slab ||
+					!node_isset(slab_nid(slab), mpol->nodes))
+
+				node = mempolicy_slab_node();
+		}
+	}
+#endif
+
 	if (!USE_LOCKLESS_FAST_PATH() ||
 	    unlikely(!object || !slab || !node_match(slab, node))) {
 		object = __slab_alloc(s, gfpflags, node, addr, c, orig_size);
@@ -5602,6 +5628,23 @@ static int __init setup_slub_min_objects(char *str)
 __setup("slab_min_objects=", setup_slub_min_objects);
 __setup_param("slub_min_objects=", slub_min_objects, setup_slub_min_objects, 0);
 
+#ifdef CONFIG_NUMA
+static int __init setup_slab_strict_numa(char *str)
+{
+	if (nr_node_ids > 1) {
+		static_branch_enable(&strict_numa);
+		pr_info("SLUB: Strict NUMA enabled.\n");
+	} else {
+		pr_warn("slab_strict_numa parameter set on non NUMA system.\n");
+	}
+
+	return 1;
+}
+
+__setup("slab_strict_numa", setup_slab_strict_numa);
+#endif
+
+
 #ifdef CONFIG_HARDENED_USERCOPY
 /*
  * Rejects incorrectly sized objects and objects that are to be copied
author	Christoph Lameter <[email protected]>	2024-10-01 12:08:06 -0700
committer	Vlastimil Babka <[email protected]>	2024-10-29 10:43:53 +0100
commit	f7c80fad6c2b64cf73361772dbd30493879e85f4 (patch)
tree	d01eaa7cc320b722c72d1279f0ce636291b537c2
parent	704573851b51808b45dae2d62059d1d8189138a2 (diff)