aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorDavid S. Miller <[email protected]>2022-01-31 15:05:25 +0000
committerDavid S. Miller <[email protected]>2022-01-31 15:05:25 +0000
commit01b2a995156d11166da00ce254d59bd7f7cefb92 (patch)
tree7211b0a22e1a7011c1210fbfad9a0695f5a9bd93 /include
parent678dfd5280341d877ca646499bfdc82a3d8b4356 (diff)
parentcb6cd2cec799356e5e2f75a8591894599a6ad49d (diff)
Merge branch 'hash-rethink'
Akhmat Karakotov says: ==================== Make hash rethink configurable As it was shown in the report by Alexander Azimov, hash rethink at the client-side may lead to connection timeout toward stateful anycast services. Tom Herbert created a patchset to address this issue by applying hash rethink only after a negative routing event (3RTOs) [1]. This change also affects server-side behavior, which we found undesirable. This patchset changes defaults in a way to make them safe: hash rethink at the client-side is disabled and enabled at the server-side upon each RTO event or in case of duplicate acknowledgments. This patchset provides two options to change default behaviour. The hash rethink may be disabled at the server-side by the new sysctl option. Changes in the sysctl option don't affect default behavior at the client-side. Hash rethink can also be enabled/disabled with socket option or bpf syscalls which ovewrite both default and sysctl settings. This socket option is available on both client and server-side. This should provide mechanics to enable hash rethink inside administrative domain, such as DC, where hash rethink at the client-side can be desirable. [1] https://lore.kernel.org/netdev/[email protected]/ v2: - Changed sysctl default to ENABLED in all patches. Reduced sysctl and socket option size to u8. Fixed netns bug reported by kernel test robot. v3: - Fixed bug with bad u8 comparison. Moved sk_txrehash to use less bytes in struct. Added WRITE_ONCE() in setsockopt in and READ_ONCE() in tcp_rtx_synack. v4: - Rebase and add documentation for sysctl option. v5: - Move sk_txrehash out of busy poll ifdef. ==================== Signed-off-by: David S. Miller <[email protected]>
Diffstat (limited to 'include')
-rw-r--r--include/net/netns/core.h1
-rw-r--r--include/net/sock.h28
-rw-r--r--include/uapi/asm-generic/socket.h2
-rw-r--r--include/uapi/linux/socket.h4
4 files changed, 22 insertions, 13 deletions
diff --git a/include/net/netns/core.h b/include/net/netns/core.h
index 552bc25b1933..388244e315e7 100644
--- a/include/net/netns/core.h
+++ b/include/net/netns/core.h
@@ -10,6 +10,7 @@ struct netns_core {
struct ctl_table_header *sysctl_hdr;
int sysctl_somaxconn;
+ u8 sysctl_txrehash;
#ifdef CONFIG_PROC_FS
struct prot_inuse __percpu *prot_inuse;
diff --git a/include/net/sock.h b/include/net/sock.h
index ff9b508d9c5f..d6c13f0fba40 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -316,6 +316,7 @@ struct sk_filter;
* @sk_rcvtimeo: %SO_RCVTIMEO setting
* @sk_sndtimeo: %SO_SNDTIMEO setting
* @sk_txhash: computed flow hash for use on transmit
+ * @sk_txrehash: enable TX hash rethink
* @sk_filter: socket filtering instructions
* @sk_timer: sock cleanup timer
* @sk_stamp: time stamp of last packet received
@@ -491,6 +492,7 @@ struct sock {
u32 sk_ack_backlog;
u32 sk_max_ack_backlog;
kuid_t sk_uid;
+ u8 sk_txrehash;
#ifdef CONFIG_NET_RX_BUSY_POLL
u8 sk_prefer_busy_poll;
u16 sk_busy_poll_budget;
@@ -587,6 +589,18 @@ static inline bool sk_user_data_is_nocopy(const struct sock *sk)
__tmp | SK_USER_DATA_NOCOPY); \
})
+static inline
+struct net *sock_net(const struct sock *sk)
+{
+ return read_pnet(&sk->sk_net);
+}
+
+static inline
+void sock_net_set(struct sock *sk, struct net *net)
+{
+ write_pnet(&sk->sk_net, net);
+}
+
/*
* SK_CAN_REUSE and SK_NO_REUSE on a socket mean that the socket is OK
* or not whether his port will be reused by someone else. SK_FORCE_REUSE
@@ -2054,7 +2068,7 @@ static inline void sk_set_txhash(struct sock *sk)
static inline bool sk_rethink_txhash(struct sock *sk)
{
- if (sk->sk_txhash) {
+ if (sk->sk_txhash && sk->sk_txrehash == SOCK_TXREHASH_ENABLED) {
sk_set_txhash(sk);
return true;
}
@@ -2704,18 +2718,6 @@ static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb)
__kfree_skb(skb);
}
-static inline
-struct net *sock_net(const struct sock *sk)
-{
- return read_pnet(&sk->sk_net);
-}
-
-static inline
-void sock_net_set(struct sock *sk, struct net *net)
-{
- write_pnet(&sk->sk_net, net);
-}
-
static inline bool
skb_sk_is_prefetched(struct sk_buff *skb)
{
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index c77a1313b3b0..467ca2f28760 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -128,6 +128,8 @@
#define SO_RESERVE_MEM 73
+#define SO_TXREHASH 74
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
diff --git a/include/uapi/linux/socket.h b/include/uapi/linux/socket.h
index eb0a9a5b6e71..51d6bb2f6765 100644
--- a/include/uapi/linux/socket.h
+++ b/include/uapi/linux/socket.h
@@ -31,4 +31,8 @@ struct __kernel_sockaddr_storage {
#define SOCK_BUF_LOCK_MASK (SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK)
+#define SOCK_TXREHASH_DEFAULT ((u8)-1)
+#define SOCK_TXREHASH_DISABLED 0
+#define SOCK_TXREHASH_ENABLED 1
+
#endif /* _UAPI_LINUX_SOCKET_H */