aboutsummaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorHuang Ying <[email protected]>2022-07-13 16:39:52 +0800
committerAndrew Morton <[email protected]>2022-09-11 20:25:54 -0700
commitc6833e10008f976a173dd5abdf992e492cbc3bcf (patch)
treee158909e41f32bfa54dd8f613d7b88926ffeb3d0 /include/linux
parent33024536bafd9129f1d16ade0974671c648700ac (diff)
memory tiering: rate limit NUMA migration throughput
In NUMA balancing memory tiering mode, if there are hot pages in slow memory node and cold pages in fast memory node, we need to promote/demote hot/cold pages between the fast and cold memory nodes. A choice is to promote/demote as fast as possible. But the CPU cycles and memory bandwidth consumed by the high promoting/demoting throughput will hurt the latency of some workload because of accessing inflating and slow memory bandwidth contention. A way to resolve this issue is to restrict the max promoting/demoting throughput. It will take longer to finish the promoting/demoting. But the workload latency will be better. This is implemented in this patch as the page promotion rate limit mechanism. The number of the candidate pages to be promoted to the fast memory node via NUMA balancing is counted, if the count exceeds the limit specified by the users, the NUMA balancing promotion will be stopped until the next second. A new sysctl knob kernel.numa_balancing_promote_rate_limit_MBps is added for the users to specify the limit. Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: "Huang, Ying" <[email protected]> Reviewed-by: Baolin Wang <[email protected]> Tested-by: Baolin Wang <[email protected]> Cc: Dave Hansen <[email protected]> Cc: Johannes Weiner <[email protected]> Cc: Mel Gorman <[email protected]> Cc: Michal Hocko <[email protected]> Cc: osalvador <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Rik van Riel <[email protected]> Cc: Shakeel Butt <[email protected]> Cc: Wei Xu <[email protected]> Cc: Yang Shi <[email protected]> Cc: Zhong Jiang <[email protected]> Cc: Zi Yan <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/mmzone.h7
-rw-r--r--include/linux/sched/sysctl.h1
2 files changed, 8 insertions, 0 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 8f571dc7c524..a0003eaa751f 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -221,6 +221,7 @@ enum node_stat_item {
#endif
#ifdef CONFIG_NUMA_BALANCING
PGPROMOTE_SUCCESS, /* promote successfully */
+ PGPROMOTE_CANDIDATE, /* candidate pages to promote */
#endif
NR_VM_NODE_STAT_ITEMS
};
@@ -998,6 +999,12 @@ typedef struct pglist_data {
struct deferred_split deferred_split_queue;
#endif
+#ifdef CONFIG_NUMA_BALANCING
+ /* start time in ms of current promote rate limit period */
+ unsigned int nbp_rl_start;
+ /* number of promote candidate pages at start time of current rate limit period */
+ unsigned long nbp_rl_nr_cand;
+#endif
/* Fields commonly accessed by the page reclaim scanner */
/*
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index e650946816d0..303ee7dd0c7e 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -27,6 +27,7 @@ enum sched_tunable_scaling {
#ifdef CONFIG_NUMA_BALANCING
extern int sysctl_numa_balancing_mode;
+extern unsigned int sysctl_numa_balancing_promote_rate_limit;
#else
#define sysctl_numa_balancing_mode 0
#endif