From 4854b463c4b27c94a7de86d16ad84f235f4c1a72 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 11 Apr 2024 12:22:29 -0700 Subject: net: dql: Avoid calling BUG() when WARN() is enough If the dql_queued() function receives an invalid argument, WARN about it and continue, instead of crashing the kernel. This was raised by checkpatch, when I am refactoring this code (see following patch/commit) WARNING: Do not crash the kernel unless it is absolutely unavoidable--use WARN_ON_ONCE() plus recovery code (if feasible) instead of BUG() or variants Signed-off-by: Breno Leitao Link: https://lore.kernel.org/r/20240411192241.2498631-2-leitao@debian.org Signed-off-by: Jakub Kicinski --- include/linux/dynamic_queue_limits.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h index 5693a4be0d9a..ff9c65841ae8 100644 --- a/include/linux/dynamic_queue_limits.h +++ b/include/linux/dynamic_queue_limits.h @@ -91,7 +91,8 @@ static inline void dql_queued(struct dql *dql, unsigned int count) { unsigned long map, now, now_hi, i; - BUG_ON(count > DQL_MAX_OBJECT); + if (WARN_ON_ONCE(count > DQL_MAX_OBJECT)) + return; dql->last_obj_cnt = count; -- cgit From cbe481a1b7410b0f2f303e8fd4867ece388a9729 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 11 Apr 2024 12:22:30 -0700 Subject: net: dql: Separate queue function responsibilities The dql_queued() function currently handles both queuing object counts and populating bitmaps for reporting stalls. This commit splits the bitmap population into a separate function, allowing for conditional invocation in scenarios where the feature is disabled. This refactor maintains functionality while improving code organization. Signed-off-by: Breno Leitao Link: https://lore.kernel.org/r/20240411192241.2498631-3-leitao@debian.org Signed-off-by: Jakub Kicinski --- include/linux/dynamic_queue_limits.h | 44 ++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h index ff9c65841ae8..9980df0b7247 100644 --- a/include/linux/dynamic_queue_limits.h +++ b/include/linux/dynamic_queue_limits.h @@ -83,28 +83,11 @@ struct dql { #define DQL_MAX_OBJECT (UINT_MAX / 16) #define DQL_MAX_LIMIT ((UINT_MAX / 2) - DQL_MAX_OBJECT) -/* - * Record number of objects queued. Assumes that caller has already checked - * availability in the queue with dql_avail. - */ -static inline void dql_queued(struct dql *dql, unsigned int count) +/* Populate the bitmap to be processed later in dql_check_stall() */ +static inline void dql_queue_stall(struct dql *dql) { unsigned long map, now, now_hi, i; - if (WARN_ON_ONCE(count > DQL_MAX_OBJECT)) - return; - - dql->last_obj_cnt = count; - - /* We want to force a write first, so that cpu do not attempt - * to get cache line containing last_obj_cnt, num_queued, adj_limit - * in Shared state, but directly does a Request For Ownership - * It is only a hint, we use barrier() only. - */ - barrier(); - - dql->num_queued += count; - now = jiffies; now_hi = now / BITS_PER_LONG; @@ -134,6 +117,29 @@ static inline void dql_queued(struct dql *dql, unsigned int count) WRITE_ONCE(DQL_HIST_ENT(dql, now_hi), map | BIT_MASK(now)); } +/* + * Record number of objects queued. Assumes that caller has already checked + * availability in the queue with dql_avail. + */ +static inline void dql_queued(struct dql *dql, unsigned int count) +{ + if (WARN_ON_ONCE(count > DQL_MAX_OBJECT)) + return; + + dql->last_obj_cnt = count; + + /* We want to force a write first, so that cpu do not attempt + * to get cache line containing last_obj_cnt, num_queued, adj_limit + * in Shared state, but directly does a Request For Ownership + * It is only a hint, we use barrier() only. + */ + barrier(); + + dql->num_queued += count; + + dql_queue_stall(dql); +} + /* Returns how many objects can be queued, < 0 indicates over limit. */ static inline int dql_avail(const struct dql *dql) { -- cgit From 721f076b62cb05108565adf17c27875ef5015307 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 11 Apr 2024 12:22:31 -0700 Subject: net: dql: Optimize stall information population When Dynamic Queue Limit (DQL) is set, it always populate stall information through dql_queue_stall(). However, this information is only necessary if a stall threshold is set, stored in struct dql->stall_thrs. dql_queue_stall() is cheap, but not free, since it does have memory barriers and so forth. Do not call dql_queue_stall() if there is no stall threshold set, and save some CPU cycles. Signed-off-by: Breno Leitao Link: https://lore.kernel.org/r/20240411192241.2498631-4-leitao@debian.org Signed-off-by: Jakub Kicinski --- include/linux/dynamic_queue_limits.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h index 9980df0b7247..869afb800ea1 100644 --- a/include/linux/dynamic_queue_limits.h +++ b/include/linux/dynamic_queue_limits.h @@ -137,7 +137,9 @@ static inline void dql_queued(struct dql *dql, unsigned int count) dql->num_queued += count; - dql_queue_stall(dql); + /* Only populate stall information if the threshold is set */ + if (READ_ONCE(dql->stall_thrs)) + dql_queue_stall(dql); } /* Returns how many objects can be queued, < 0 indicates over limit. */ -- cgit From 4ba67ef3a1fbb7d8dc5f00de9b93a583d05b38cc Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 11 Apr 2024 12:22:32 -0700 Subject: net: dqs: make struct dql more cache efficient With the previous change, struct dqs->stall_thrs will be in the hot path (at queue side), even if DQS is disabled. The other fields accessed in this function (last_obj_cnt and num_queued) are in the first cache line, let's move this field (stall_thrs) to the very first cache line, since there is a hole there. This does not change the structure size, since it moves an short (2 bytes) to 4-bytes whole in the first cache line. This is the new structure format now: struct dql { unsigned int num_queued; unsigned int last_obj_cnt; ... short unsigned int stall_thrs; /* XXX 2 bytes hole, try to pack */ ... /* --- cacheline 1 boundary (64 bytes) --- */ ... /* Longest stall detected, reported to user */ short unsigned int stall_max; /* XXX 2 bytes hole, try to pack */ }; Also, read the stall_thrs (now in the very first cache line) earlier, together with dql->num_queued (also in the first cache line). Suggested-by: Jakub Kicinski Suggested-by: Eric Dumazet Signed-off-by: Breno Leitao Link: https://lore.kernel.org/r/20240411192241.2498631-5-leitao@debian.org Signed-off-by: Jakub Kicinski --- include/linux/dynamic_queue_limits.h | 5 +++-- lib/dynamic_queue_limits.c | 13 +++++++++---- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h index 869afb800ea1..281298e77a15 100644 --- a/include/linux/dynamic_queue_limits.h +++ b/include/linux/dynamic_queue_limits.h @@ -50,6 +50,9 @@ struct dql { unsigned int adj_limit; /* limit + num_completed */ unsigned int last_obj_cnt; /* Count at last queuing */ + /* Stall threshold (in jiffies), defined by user */ + unsigned short stall_thrs; + unsigned long history_head; /* top 58 bits of jiffies */ /* stall entries, a bit per entry */ unsigned long history[DQL_HIST_LEN]; @@ -71,8 +74,6 @@ struct dql { unsigned int min_limit; /* Minimum limit */ unsigned int slack_hold_time; /* Time to measure slack */ - /* Stall threshold (in jiffies), defined by user */ - unsigned short stall_thrs; /* Longest stall detected, reported to user */ unsigned short stall_max; unsigned long last_reap; /* Last reap (in jiffies) */ diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c index a1389db1c30a..e49deddd3de9 100644 --- a/lib/dynamic_queue_limits.c +++ b/lib/dynamic_queue_limits.c @@ -15,12 +15,10 @@ #define POSDIFF(A, B) ((int)((A) - (B)) > 0 ? (A) - (B) : 0) #define AFTER_EQ(A, B) ((int)((A) - (B)) >= 0) -static void dql_check_stall(struct dql *dql) +static void dql_check_stall(struct dql *dql, unsigned short stall_thrs) { - unsigned short stall_thrs; unsigned long now; - stall_thrs = READ_ONCE(dql->stall_thrs); if (!stall_thrs) return; @@ -86,9 +84,16 @@ void dql_completed(struct dql *dql, unsigned int count) { unsigned int inprogress, prev_inprogress, limit; unsigned int ovlimit, completed, num_queued; + unsigned short stall_thrs; bool all_prev_completed; num_queued = READ_ONCE(dql->num_queued); + /* Read stall_thrs in advance since it belongs to the same (first) + * cache line as ->num_queued. This way, dql_check_stall() does not + * need to touch the first cache line again later, reducing the window + * of possible false sharing. + */ + stall_thrs = READ_ONCE(dql->stall_thrs); /* Can't complete more than what's in queue */ BUG_ON(count > num_queued - dql->num_completed); @@ -178,7 +183,7 @@ void dql_completed(struct dql *dql, unsigned int count) dql->num_completed = completed; dql->prev_num_queued = num_queued; - dql_check_stall(dql); + dql_check_stall(dql, stall_thrs); } EXPORT_SYMBOL(dql_completed); -- cgit