diff options
| author | Ingo Molnar <[email protected]> | 2024-02-14 10:45:07 +0100 |
|---|---|---|
| committer | Ingo Molnar <[email protected]> | 2024-02-14 10:45:07 +0100 |
| commit | 03c11eb3b16dc0058589751dfd91f254be2be613 (patch) | |
| tree | e5f2889212fec0bb0babdce9abd781ab487e246a /include/net | |
| parent | de8c6a352131f642b82474abe0cbb5dd26a7e081 (diff) | |
| parent | 841c35169323cd833294798e58b9bf63fa4fa1de (diff) | |
Merge tag 'v6.8-rc4' into x86/percpu, to resolve conflicts and refresh the branch
Conflicts:
arch/x86/include/asm/percpu.h
arch/x86/include/asm/text-patching.h
Signed-off-by: Ingo Molnar <[email protected]>
Diffstat (limited to 'include/net')
89 files changed, 3028 insertions, 919 deletions
diff --git a/include/net/Space.h b/include/net/Space.h index c29f3d51c078..ef42629f4258 100644 --- a/include/net/Space.h +++ b/include/net/Space.h @@ -10,4 +10,3 @@ struct net_device *smc_init(int unit); struct net_device *cs89x0_probe(int unit); struct net_device *tc515_probe(int unit); struct net_device *lance_probe(int unit); -struct net_device *cops_probe(int unit); diff --git a/include/net/act_api.h b/include/net/act_api.h index 4ae0580b63ca..e1e5e72b901e 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -137,6 +137,7 @@ struct tc_action_ops { #ifdef CONFIG_NET_CLS_ACT +#define ACT_P_BOUND 0 #define ACT_P_CREATED 1 #define ACT_P_DELETED 1 @@ -191,7 +192,7 @@ int tcf_idr_create_from_flags(struct tc_action_net *tn, u32 index, struct nlattr *est, struct tc_action **a, const struct tc_action_ops *ops, int bind, u32 flags); -void tcf_idr_insert_many(struct tc_action *actions[]); +void tcf_idr_insert_many(struct tc_action *actions[], int init_res[]); void tcf_idr_cleanup(struct tc_action_net *tn, u32 index); int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index, struct tc_action **a, int bind); @@ -207,8 +208,7 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, struct tc_action *actions[], int init_res[], size_t *attr_size, u32 flags, u32 fl_flags, struct netlink_ext_ack *extack); -struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police, - bool rtnl_held, +struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, u32 flags, struct netlink_ext_ack *extack); struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 82da55101b5a..61ebe723ee4d 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -31,17 +31,22 @@ struct prefix_info { __u8 length; __u8 prefix_len; + union __packed { + __u8 flags; + struct __packed { #if defined(__BIG_ENDIAN_BITFIELD) - __u8 onlink : 1, + __u8 onlink : 1, autoconf : 1, reserved : 6; #elif defined(__LITTLE_ENDIAN_BITFIELD) - __u8 reserved : 6, + __u8 reserved : 6, autoconf : 1, onlink : 1; #else #error "Please fix <asm/byteorder.h>" #endif + }; + }; __be32 valid; __be32 prefered; __be32 reserved2; @@ -49,6 +54,9 @@ struct prefix_info { struct in6_addr prefix; }; +/* rfc4861 4.6.2: IPv6 PIO is 32 bytes in size */ +static_assert(sizeof(struct prefix_info) == 32); + #include <linux/ipv6.h> #include <linux/netdevice.h> #include <net/if_inet6.h> diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index 5531dd08061e..0754c463224a 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -15,6 +15,7 @@ struct key; struct sock; struct socket; struct rxrpc_call; +struct rxrpc_peer; enum rxrpc_abort_reason; enum rxrpc_interruptibility { @@ -41,13 +42,14 @@ void rxrpc_kernel_new_call_notification(struct socket *, rxrpc_notify_new_call_t, rxrpc_discard_new_call_t); struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, - struct sockaddr_rxrpc *srx, + struct rxrpc_peer *peer, struct key *key, unsigned long user_call_ID, s64 tx_total_len, u32 hard_timeout, gfp_t gfp, rxrpc_notify_rx_t notify_rx, + u16 service_id, bool upgrade, enum rxrpc_interruptibility interruptibility, unsigned int debug_id); @@ -60,9 +62,14 @@ bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *, u32, int, enum rxrpc_abort_reason); void rxrpc_kernel_shutdown_call(struct socket *sock, struct rxrpc_call *call); void rxrpc_kernel_put_call(struct socket *sock, struct rxrpc_call *call); -void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *, - struct sockaddr_rxrpc *); -bool rxrpc_kernel_get_srtt(struct socket *, struct rxrpc_call *, u32 *); +struct rxrpc_peer *rxrpc_kernel_lookup_peer(struct socket *sock, + struct sockaddr_rxrpc *srx, gfp_t gfp); +void rxrpc_kernel_put_peer(struct rxrpc_peer *peer); +struct rxrpc_peer *rxrpc_kernel_get_peer(struct rxrpc_peer *peer); +struct rxrpc_peer *rxrpc_kernel_get_call_peer(struct socket *sock, struct rxrpc_call *call); +const struct sockaddr_rxrpc *rxrpc_kernel_remote_srx(const struct rxrpc_peer *peer); +const struct sockaddr *rxrpc_kernel_remote_addr(const struct rxrpc_peer *peer); +unsigned int rxrpc_kernel_get_srtt(const struct rxrpc_peer *); int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t, rxrpc_user_attach_call_t, unsigned long, gfp_t, unsigned int); diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 824c258143a3..afd40dce40f3 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -46,12 +46,6 @@ struct scm_stat { #define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb)) -#define unix_state_lock(s) spin_lock(&unix_sk(s)->lock) -#define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock) -#define unix_state_lock_nested(s) \ - spin_lock_nested(&unix_sk(s)->lock, \ - SINGLE_DEPTH_NESTING) - /* The AF_UNIX socket */ struct unix_sock { /* WARNING: sk has to be the first member */ @@ -75,6 +69,21 @@ struct unix_sock { }; #define unix_sk(ptr) container_of_const(ptr, struct unix_sock, sk) +#define unix_peer(sk) (unix_sk(sk)->peer) + +#define unix_state_lock(s) spin_lock(&unix_sk(s)->lock) +#define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock) +enum unix_socket_lock_class { + U_LOCK_NORMAL, + U_LOCK_SECOND, /* for double locking, see unix_state_double_lock(). */ + U_LOCK_DIAG, /* used while dumping icons, see sk_diag_dump_icons(). */ +}; + +static inline void unix_state_lock_nested(struct sock *sk, + enum unix_socket_lock_class subclass) +{ + spin_lock_nested(&unix_sk(sk)->lock, subclass); +} #define peer_wait peer_wq.wait diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index b01cf9ac2437..535701efc1e5 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -137,7 +137,6 @@ struct vsock_transport { u64 (*stream_rcvhiwat)(struct vsock_sock *); bool (*stream_is_active)(struct vsock_sock *); bool (*stream_allow)(u32 cid, u32 port); - int (*set_rcvlowat)(struct vsock_sock *vsk, int val); /* SEQ_PACKET. */ ssize_t (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg, @@ -168,6 +167,7 @@ struct vsock_transport { struct vsock_transport_send_notify_data *); /* sk_lock held by the caller */ void (*notify_buffer_size)(struct vsock_sock *, u64 *); + int (*notify_set_rcvlowat)(struct vsock_sock *vsk, int val); /* Shutdown. */ int (*shutdown)(struct vsock_sock *, int); @@ -177,6 +177,9 @@ struct vsock_transport { /* Read a single skb */ int (*read_skb)(struct vsock_sock *, skb_read_actor_t); + + /* Zero-copy. */ + bool (*msgzerocopy_allow)(void); }; /**** CORE ****/ @@ -241,4 +244,8 @@ static inline void __init vsock_bpf_build_proto(void) {} #endif +static inline bool vsock_msgzerocopy_allow(const struct vsock_transport *t) +{ + return t->msgzerocopy_allow && t->msgzerocopy_allow(); +} #endif /* __AF_VSOCK_H__ */ diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index aa90adc3b2a4..7ffa8c192c3f 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -541,7 +541,7 @@ static inline struct sk_buff *bt_skb_sendmsg(struct sock *sk, return ERR_PTR(-EFAULT); } - skb->priority = sk->sk_priority; + skb->priority = READ_ONCE(sk->sk_priority); return skb; } diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 87d92accc26e..bdee5d649cc6 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -1,6 +1,7 @@ /* BlueZ - Bluetooth protocol stack for Linux Copyright (C) 2000-2001 Qualcomm Incorporated + Copyright 2023 NXP Written 2000,2001 by Maxim Krasnyansky <[email protected]> @@ -673,6 +674,8 @@ enum { #define HCI_TX_POWER_INVALID 127 #define HCI_RSSI_INVALID 127 +#define HCI_SYNC_HANDLE_INVALID 0xffff + #define HCI_ROLE_MASTER 0x00 #define HCI_ROLE_SLAVE 0x01 diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index e6359f7346f1..8f8dd9173714 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -189,6 +189,7 @@ struct blocked_key { struct smp_csrk { bdaddr_t bdaddr; u8 bdaddr_type; + u8 link_type; u8 type; u8 val[16]; }; @@ -198,6 +199,7 @@ struct smp_ltk { struct rcu_head rcu; bdaddr_t bdaddr; u8 bdaddr_type; + u8 link_type; u8 authenticated; u8 type; u8 enc_size; @@ -212,6 +214,7 @@ struct smp_irk { bdaddr_t rpa; bdaddr_t bdaddr; u8 addr_type; + u8 link_type; u8 val[16]; }; @@ -219,6 +222,8 @@ struct link_key { struct list_head list; struct rcu_head rcu; bdaddr_t bdaddr; + u8 bdaddr_type; + u8 link_type; u8 type; u8 val[HCI_LINK_KEY_SIZE]; u8 pin_len; @@ -350,7 +355,9 @@ struct hci_dev { struct list_head list; struct mutex lock; - char name[8]; + struct ida unset_handle_ida; + + const char *name; unsigned long flags; __u16 id; __u8 bus; @@ -532,7 +539,6 @@ struct hci_dev { struct work_struct tx_work; struct delayed_work le_scan_disable; - struct delayed_work le_scan_restart; struct sk_buff_head rx_q; struct sk_buff_head raw_q; @@ -950,7 +956,6 @@ void hci_inquiry_cache_flush(struct hci_dev *hdev); /* ----- HCI Connections ----- */ enum { HCI_CONN_AUTH_PEND, - HCI_CONN_REAUTH_PEND, HCI_CONN_ENCRYPT_PEND, HCI_CONN_RSWITCH_PEND, HCI_CONN_MODE_CHANGE_PEND, @@ -1225,11 +1230,11 @@ static inline struct hci_conn *hci_conn_hash_lookup_cis(struct hci_dev *hdev, continue; /* Match CIG ID if set */ - if (cig != BT_ISO_QOS_CIG_UNSET && cig != c->iso_qos.ucast.cig) + if (cig != c->iso_qos.ucast.cig) continue; /* Match CIS ID if set */ - if (id != BT_ISO_QOS_CIS_UNSET && id != c->iso_qos.ucast.cis) + if (id != c->iso_qos.ucast.cis) continue; /* Match destination address if set */ @@ -1290,8 +1295,8 @@ static inline struct hci_conn *hci_conn_hash_lookup_big(struct hci_dev *hdev, return NULL; } -static inline struct hci_conn *hci_conn_hash_lookup_big_any_dst(struct hci_dev *hdev, - __u8 handle) +static inline struct hci_conn * +hci_conn_hash_lookup_big_state(struct hci_dev *hdev, __u8 handle, __u16 state) { struct hci_conn_hash *h = &hdev->conn_hash; struct hci_conn *c; @@ -1299,10 +1304,11 @@ static inline struct hci_conn *hci_conn_hash_lookup_big_any_dst(struct hci_dev * rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != ISO_LINK) + if (bacmp(&c->dst, BDADDR_ANY) || c->type != ISO_LINK || + c->state != state) continue; - if (handle != BT_ISO_QOS_BIG_UNSET && handle == c->iso_qos.bcast.big) { + if (handle == c->iso_qos.bcast.big) { rcu_read_unlock(); return c; } @@ -1314,7 +1320,7 @@ static inline struct hci_conn *hci_conn_hash_lookup_big_any_dst(struct hci_dev * } static inline struct hci_conn * -hci_conn_hash_lookup_pa_sync(struct hci_dev *hdev, __u8 big) +hci_conn_hash_lookup_pa_sync_big_handle(struct hci_dev *hdev, __u8 big) { struct hci_conn_hash *h = &hdev->conn_hash; struct hci_conn *c; @@ -1336,6 +1342,29 @@ hci_conn_hash_lookup_pa_sync(struct hci_dev *hdev, __u8 big) return NULL; } +static inline struct hci_conn * +hci_conn_hash_lookup_pa_sync_handle(struct hci_dev *hdev, __u16 sync_handle) +{ + struct hci_conn_hash *h = &hdev->conn_hash; + struct hci_conn *c; + + rcu_read_lock(); + + list_for_each_entry_rcu(c, &h->list, list) { + if (c->type != ISO_LINK || + !test_bit(HCI_CONN_PA_SYNC, &c->flags)) + continue; + + if (c->sync_handle == sync_handle) { + rcu_read_unlock(); + return c; + } + } + rcu_read_unlock(); + + return NULL; +} + static inline struct hci_conn *hci_conn_hash_lookup_state(struct hci_dev *hdev, __u8 type, __u16 state) { @@ -1377,6 +1406,26 @@ static inline void hci_conn_hash_list_state(struct hci_dev *hdev, rcu_read_unlock(); } +static inline void hci_conn_hash_list_flag(struct hci_dev *hdev, + hci_conn_func_t func, __u8 type, + __u8 flag, void *data) +{ + struct hci_conn_hash *h = &hdev->conn_hash; + struct hci_conn *c; + + if (!func) + return; + + rcu_read_lock(); + + list_for_each_entry_rcu(c, &h->list, list) { + if (c->type == type && test_bit(flag, &c->flags)) + func(c, data); + } + + rcu_read_unlock(); +} + static inline struct hci_conn *hci_lookup_le_connect(struct hci_dev *hdev) { struct hci_conn_hash *h = &hdev->conn_hash; @@ -1426,7 +1475,9 @@ int hci_le_create_cis_pending(struct hci_dev *hdev); int hci_conn_check_create_cis(struct hci_conn *conn); struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, - u8 role); + u8 role, u16 handle); +struct hci_conn *hci_conn_add_unset(struct hci_dev *hdev, int type, + bdaddr_t *dst, u8 role); void hci_conn_del(struct hci_conn *conn); void hci_conn_hash_flush(struct hci_dev *hdev); void hci_conn_check_pending(struct hci_dev *hdev); diff --git a/include/net/bluetooth/hci_mon.h b/include/net/bluetooth/hci_mon.h index 2d5fcda1bcd0..082f89531b88 100644 --- a/include/net/bluetooth/hci_mon.h +++ b/include/net/bluetooth/hci_mon.h @@ -56,7 +56,7 @@ struct hci_mon_new_index { __u8 type; __u8 bus; bdaddr_t bdaddr; - char name[8]; + char name[8] __nonstring; } __packed; #define HCI_MON_NEW_INDEX_SIZE 16 diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 57eeb07aeb25..6efbc2152146 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -80,6 +80,8 @@ int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len, u8 *data, u32 flags, u16 min_interval, u16 max_interval, u16 sync_interval); +int hci_disable_per_advertising_sync(struct hci_dev *hdev, u8 instance); + int hci_remove_advertising_sync(struct hci_dev *hdev, struct sock *sk, u8 instance, bool force); int hci_disable_advertising_sync(struct hci_dev *hdev); diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 3a4b684f89bf..2b54fdd8ca15 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -52,7 +52,7 @@ * such wiphy can have zero, one, or many virtual interfaces associated with * it, which need to be identified as such by pointing the network interface's * @ieee80211_ptr pointer to a &struct wireless_dev which further describes - * the wireless part of the interface, normally this struct is embedded in the + * the wireless part of the interface. Normally this struct is embedded in the * network interface's private data area. Drivers can optionally allow creating * or destroying virtual interfaces on the fly, but without at least one or the * ability to create some the wireless device isn't useful. @@ -76,6 +76,8 @@ struct wiphy; * @IEEE80211_CHAN_DISABLED: This channel is disabled. * @IEEE80211_CHAN_NO_IR: do not initiate radiation, this includes * sending probe requests or beaconing. + * @IEEE80211_CHAN_PSD: Power spectral density (in dBm) is set for this + * channel. * @IEEE80211_CHAN_RADAR: Radar detection is required on this channel. * @IEEE80211_CHAN_NO_HT40PLUS: extension channel above this channel * is not permitted. @@ -115,11 +117,16 @@ struct wiphy; * This may be due to the driver or due to regulatory bandwidth * restrictions. * @IEEE80211_CHAN_NO_EHT: EHT operation is not permitted on this channel. + * @IEEE80211_CHAN_DFS_CONCURRENT: See %NL80211_RRF_DFS_CONCURRENT + * @IEEE80211_CHAN_NO_UHB_VLP_CLIENT: Client connection with VLP AP + * not permitted using this channel + * @IEEE80211_CHAN_NO_UHB_AFC_CLIENT: Client connection with AFC AP + * not permitted using this channel */ enum ieee80211_channel_flags { IEEE80211_CHAN_DISABLED = 1<<0, IEEE80211_CHAN_NO_IR = 1<<1, - /* hole at 1<<2 */ + IEEE80211_CHAN_PSD = 1<<2, IEEE80211_CHAN_RADAR = 1<<3, IEEE80211_CHAN_NO_HT40PLUS = 1<<4, IEEE80211_CHAN_NO_HT40MINUS = 1<<5, @@ -138,6 +145,9 @@ enum ieee80211_channel_flags { IEEE80211_CHAN_16MHZ = 1<<18, IEEE80211_CHAN_NO_320MHZ = 1<<19, IEEE80211_CHAN_NO_EHT = 1<<20, + IEEE80211_CHAN_DFS_CONCURRENT = 1<<21, + IEEE80211_CHAN_NO_UHB_VLP_CLIENT= 1<<22, + IEEE80211_CHAN_NO_UHB_AFC_CLIENT= 1<<23, }; #define IEEE80211_CHAN_NO_HT40 \ @@ -171,6 +181,7 @@ enum ieee80211_channel_flags { * on this channel. * @dfs_state_entered: timestamp (jiffies) when the dfs state was entered. * @dfs_cac_ms: DFS CAC time in milliseconds, this is valid for DFS channels. + * @psd: power spectral density (in dBm) */ struct ieee80211_channel { enum nl80211_band band; @@ -187,6 +198,7 @@ struct ieee80211_channel { enum nl80211_dfs_state dfs_state; unsigned long dfs_state_entered; unsigned int dfs_cac_ms; + s8 psd; }; /** @@ -410,6 +422,19 @@ struct ieee80211_sta_eht_cap { u8 eht_ppe_thres[IEEE80211_EHT_PPE_THRES_MAX_LEN]; }; +/* sparse defines __CHECKER__; see Documentation/dev-tools/sparse.rst */ +#ifdef __CHECKER__ +/* + * This is used to mark the sband->iftype_data pointer which is supposed + * to be an array with special access semantics (per iftype), but a lot + * of code got it wrong in the past, so with this marking sparse will be + * noisy when the pointer is used directly. + */ +# define __iftd __attribute__((noderef, address_space(__iftype_data))) +#else +# define __iftd +#endif /* __CHECKER__ */ + /** * struct ieee80211_sband_iftype_data - sband data per interface type * @@ -543,10 +568,48 @@ struct ieee80211_supported_band { struct ieee80211_sta_s1g_cap s1g_cap; struct ieee80211_edmg edmg_cap; u16 n_iftype_data; - const struct ieee80211_sband_iftype_data *iftype_data; + const struct ieee80211_sband_iftype_data __iftd *iftype_data; }; /** + * _ieee80211_set_sband_iftype_data - set sband iftype data array + * @sband: the sband to initialize + * @iftd: the iftype data array pointer + * @n_iftd: the length of the iftype data array + * + * Set the sband iftype data array; use this where the length cannot + * be derived from the ARRAY_SIZE() of the argument, but prefer + * ieee80211_set_sband_iftype_data() where it can be used. + */ +static inline void +_ieee80211_set_sband_iftype_data(struct ieee80211_supported_band *sband, + const struct ieee80211_sband_iftype_data *iftd, + u16 n_iftd) +{ + sband->iftype_data = (const void __iftd __force *)iftd; + sband->n_iftype_data = n_iftd; +} + +/** + * ieee80211_set_sband_iftype_data - set sband iftype data array + * @sband: the sband to initialize + * @iftd: the iftype data array + */ +#define ieee80211_set_sband_iftype_data(sband, iftd) \ + _ieee80211_set_sband_iftype_data(sband, iftd, ARRAY_SIZE(iftd)) + +/** + * for_each_sband_iftype_data - iterate sband iftype data entries + * @sband: the sband whose iftype_data array to iterate + * @i: iterator counter + * @iftd: iftype data pointer to set + */ +#define for_each_sband_iftype_data(sband, i, iftd) \ + for (i = 0, iftd = (const void __force *)&(sband)->iftype_data[i]; \ + i < (sband)->n_iftype_data; \ + i++, iftd = (const void __force *)&(sband)->iftype_data[i]) + +/** * ieee80211_get_sband_iftype_data - return sband data for a given iftype * @sband: the sband to search for the STA on * @iftype: enum nl80211_iftype @@ -557,6 +620,7 @@ static inline const struct ieee80211_sband_iftype_data * ieee80211_get_sband_iftype_data(const struct ieee80211_supported_band *sband, u8 iftype) { + const struct ieee80211_sband_iftype_data *data; int i; if (WARN_ON(iftype >= NL80211_IFTYPE_MAX)) @@ -565,10 +629,7 @@ ieee80211_get_sband_iftype_data(const struct ieee80211_supported_band *sband, if (iftype == NL80211_IFTYPE_AP_VLAN) iftype = NL80211_IFTYPE_AP; - for (i = 0; i < sband->n_iftype_data; i++) { - const struct ieee80211_sband_iftype_data *data = - &sband->iftype_data[i]; - + for_each_sband_iftype_data(sband, i, data) { if (data->types_mask & BIT(iftype)) return data; } @@ -924,6 +985,15 @@ cfg80211_chandef_compatible(const struct cfg80211_chan_def *chandef1, const struct cfg80211_chan_def *chandef2); /** + * nl80211_chan_width_to_mhz - get the channel width in MHz + * @chan_width: the channel width from &enum nl80211_chan_width + * + * Return: channel width in MHz if the chan_width from &enum nl80211_chan_width + * is valid. -1 otherwise. + */ +int nl80211_chan_width_to_mhz(enum nl80211_chan_width chan_width); + +/** * cfg80211_chandef_valid - check if a channel definition is valid * @chandef: the channel definition to check * Return: %true if the channel definition is valid. %false otherwise. @@ -954,6 +1024,30 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy, enum nl80211_iftype iftype); /** + * cfg80211_chandef_dfs_usable - checks if chandef is DFS usable and we + * can/need start CAC on such channel + * @wiphy: the wiphy to validate against + * @chandef: the channel definition to check + * + * Return: true if all channels available and at least + * one channel requires CAC (NL80211_DFS_USABLE) + */ +bool cfg80211_chandef_dfs_usable(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef); + +/** + * cfg80211_chandef_dfs_cac_time - get the DFS CAC time (in ms) for given + * channel definition + * @wiphy: the wiphy to validate against + * @chandef: the channel definition to check + * + * Returns: DFS CAC time (in ms) which applies for this channel definition + */ +unsigned int +cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef); + +/** * nl80211_send_chandef - sends the channel definition. * @msg: the msg to send channel definition * @chandef: the channel definition to check @@ -1289,6 +1383,7 @@ struct cfg80211_acl_data { * struct cfg80211_fils_discovery - FILS discovery parameters from * IEEE Std 802.11ai-2016, Annex C.3 MIB detail. * + * @update: Set to true if the feature configuration should be updated. * @min_interval: Minimum packet interval in TUs (0 - 10000) * @max_interval: Maximum packet interval in TUs (0 - 10000) * @tmpl_len: Template length @@ -1296,6 +1391,7 @@ struct cfg80211_acl_data { * frame headers. */ struct cfg80211_fils_discovery { + bool update; u32 min_interval; u32 max_interval; size_t tmpl_len; @@ -1306,6 +1402,7 @@ struct cfg80211_fils_discovery { * struct cfg80211_unsol_bcast_probe_resp - Unsolicited broadcast probe * response parameters in 6GHz. * + * @update: Set to true if the feature configuration should be updated. * @interval: Packet interval in TUs. Maximum allowed is 20 TU, as mentioned * in IEEE P802.11ax/D6.0 26.17.2.3.2 - AP behavior for fast passive * scanning @@ -1313,6 +1410,7 @@ struct cfg80211_fils_discovery { * @tmpl: Template data for probe response */ struct cfg80211_unsol_bcast_probe_resp { + bool update; u32 interval; size_t tmpl_len; const u8 *tmpl; @@ -1399,6 +1497,22 @@ struct cfg80211_ap_settings { u16 punct_bitmap; }; + +/** + * struct cfg80211_ap_update - AP configuration update + * + * Subset of &struct cfg80211_ap_settings, for updating a running AP. + * + * @beacon: beacon data + * @fils_discovery: FILS discovery transmission parameters + * @unsol_bcast_probe_resp: Unsolicited broadcast probe response parameters + */ +struct cfg80211_ap_update { + struct cfg80211_beacon_data beacon; + struct cfg80211_fils_discovery fils_discovery; + struct cfg80211_unsol_bcast_probe_resp unsol_bcast_probe_resp; +}; + /** * struct cfg80211_csa_settings - channel switch settings * @@ -1568,6 +1682,21 @@ struct link_station_del_parameters { }; /** + * struct cfg80211_ttlm_params: TID to link mapping parameters + * + * Used for setting a TID to link mapping. + * + * @dlink: Downlink TID to link mapping, as defined in section 9.4.2.314 + * (TID-To-Link Mapping element) in Draft P802.11be_D4.0. + * @ulink: Uplink TID to link mapping, as defined in section 9.4.2.314 + * (TID-To-Link Mapping element) in Draft P802.11be_D4.0. + */ +struct cfg80211_ttlm_params { + u16 dlink[8]; + u16 ulink[8]; +}; + +/** * struct station_parameters - station parameters * * Used to change and create a new station. @@ -2346,7 +2475,7 @@ struct mesh_config { * @user_mpm: userspace handles all MPM functions * @dtim_period: DTIM period to use * @beacon_interval: beacon interval to use - * @mcast_rate: multicat rate for Mesh Node [6Mbps is the default for 802.11a] + * @mcast_rate: multicast rate for Mesh Node [6Mbps is the default for 802.11a] * @basic_rates: basic rates to use when creating the mesh * @beacon_rate: bitrate to be used for beacons * @userspace_handles_dfs: whether user space controls DFS operation, i.e. @@ -2463,7 +2592,7 @@ struct cfg80211_scan_info { * @short_ssid: short ssid to scan for * @bssid: bssid to scan for * @channel_idx: idx of the channel in the channel array in the scan request - * which the above info relvant to + * which the above info is relevant to * @unsolicited_probe: the AP transmits unsolicited probe response every 20 TU * @short_ssid_valid: @short_ssid is valid and can be used * @psc_no_listen: when set, and the channel is a PSC channel, no need to wait @@ -2487,7 +2616,6 @@ struct cfg80211_scan_6ghz_params { * @n_ssids: number of SSIDs * @channels: channels to scan on. * @n_channels: total number of channels to scan - * @scan_width: channel width for scanning * @ie: optional information element(s) to add into Probe Request or %NULL * @ie_len: length of ie in octets * @duration: how long to listen on each channel, in TUs. If @@ -2512,12 +2640,13 @@ struct cfg80211_scan_6ghz_params { * @n_6ghz_params: number of 6 GHz params * @scan_6ghz_params: 6 GHz params * @bssid: BSSID to scan for (most commonly, the wildcard BSSID) + * @tsf_report_link_id: for MLO, indicates the link ID of the BSS that should be + * used for TSF reporting. Can be set to -1 to indicate no preference. */ struct cfg80211_scan_request { struct cfg80211_ssid *ssids; int n_ssids; u32 n_channels; - enum nl80211_bss_scan_width scan_width; const u8 *ie; size_t ie_len; u16 duration; @@ -2541,6 +2670,7 @@ struct cfg80211_scan_request { bool scan_6ghz; u32 n_6ghz_params; struct cfg80211_scan_6ghz_params *scan_6ghz_params; + s8 tsf_report_link_id; /* keep last */ struct ieee80211_channel *channels[] __counted_by(n_channels); @@ -2566,7 +2696,7 @@ static inline void get_random_mask_addr(u8 *buf, const u8 *addr, const u8 *mask) * or no match (RSSI only) * @rssi_thold: don't report scan results below this threshold (in s32 dBm) * @per_band_rssi_thold: Minimum rssi threshold for each band to be applied - * for filtering out scan results received. Drivers advertize this support + * for filtering out scan results received. Drivers advertise this support * of band specific rssi based filtering through the feature capability * %NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD. These band * specific rssi thresholds take precedence over rssi_thold, if specified. @@ -2612,14 +2742,13 @@ struct cfg80211_bss_select_adjust { * @ssids: SSIDs to scan for (passed in the probe_reqs in active scans) * @n_ssids: number of SSIDs * @n_channels: total number of channels to scan - * @scan_width: channel width for scanning * @ie: optional information element(s) to add into Probe Request or %NULL * @ie_len: length of ie in octets * @flags: control flags from &enum nl80211_scan_flags * @match_sets: sets of parameters to be matched for a scan result * entry to be considered valid and to be passed to the host * (others are filtered out). - * If ommited, all results are passed. + * If omitted, all results are passed. * @n_match_sets: number of match sets * @report_results: indicates that results were reported for this request * @wiphy: the wiphy this was for @@ -2653,14 +2782,13 @@ struct cfg80211_bss_select_adjust { * to the specified band while deciding whether a better BSS is reported * using @relative_rssi. If delta is a negative number, the BSSs that * belong to the specified band will be penalized by delta dB in relative - * comparisions. + * comparisons. */ struct cfg80211_sched_scan_request { u64 reqid; struct cfg80211_ssid *ssids; int n_ssids; u32 n_channels; - enum nl80211_bss_scan_width scan_width; const u8 *ie; size_t ie_len; u32 flags; @@ -2708,7 +2836,6 @@ enum cfg80211_signal_type { /** * struct cfg80211_inform_bss - BSS inform data * @chan: channel the frame was received on - * @scan_width: scan width that was used * @signal: signal strength value, according to the wiphy's * signal type * @boottime_ns: timestamp (CLOCK_BOOTTIME) when the information was @@ -2724,11 +2851,17 @@ enum cfg80211_signal_type { * the BSS that requested the scan in which the beacon/probe was received. * @chains: bitmask for filled values in @chain_signal. * @chain_signal: per-chain signal strength of last received BSS in dBm. + * @restrict_use: restrict usage, if not set, assume @use_for is + * %NL80211_BSS_USE_FOR_NORMAL. + * @use_for: bitmap of possible usage for this BSS, see + * &enum nl80211_bss_use_for + * @cannot_use_reasons: the reasons (bitmap) for not being able to connect, + * if @restrict_use is set and @use_for is zero (empty); may be 0 for + * unspecified reasons; see &enum nl80211_bss_cannot_use_reasons * @drv_data: Data to be passed through to @inform_bss */ struct cfg80211_inform_bss { struct ieee80211_channel *chan; - enum nl80211_bss_scan_width scan_width; s32 signal; u64 boottime_ns; u64 parent_tsf; @@ -2736,6 +2869,9 @@ struct cfg80211_inform_bss { u8 chains; s8 chain_signal[IEEE80211_MAX_CHAINS]; + u8 restrict_use:1, use_for:7; + u8 cannot_use_reasons; + void *drv_data; }; @@ -2762,7 +2898,6 @@ struct cfg80211_bss_ies { * for use in scan results and similar. * * @channel: channel this BSS is on - * @scan_width: width of the control channel * @bssid: BSSID of the BSS * @beacon_interval: the beacon interval as from the frame * @capability: the capability field in host byte order @@ -2775,6 +2910,8 @@ struct cfg80211_bss_ies { * own the beacon_ies, but they're just pointers to the ones from the * @hidden_beacon_bss struct) * @proberesp_ies: the information elements from the last Probe Response frame + * @proberesp_ecsa_stuck: ECSA element is stuck in the Probe Response frame, + * cannot rely on it having valid data * @hidden_beacon_bss: in case this BSS struct represents a probe response from * a BSS that hides the SSID in its beacon, this points to the BSS struct * that holds the beacon data. @beacon_ies is still valid, of course, and @@ -2788,11 +2925,15 @@ struct cfg80211_bss_ies { * @chain_signal: per-chain signal strength of last received BSS in dBm. * @bssid_index: index in the multiple BSS set * @max_bssid_indicator: max number of members in the BSS set + * @use_for: bitmap of possible usage for this BSS, see + * &enum nl80211_bss_use_for + * @cannot_use_reasons: the reasons (bitmap) for not being able to connect, + * if @restrict_use is set and @use_for is zero (empty); may be 0 for + * unspecified reasons; see &enum nl80211_bss_cannot_use_reasons * @priv: private area for driver use, has at least wiphy->bss_priv_size bytes */ struct cfg80211_bss { struct ieee80211_channel *channel; - enum nl80211_bss_scan_width scan_width; const struct cfg80211_bss_ies __rcu *ies; const struct cfg80211_bss_ies __rcu *beacon_ies; @@ -2811,9 +2952,14 @@ struct cfg80211_bss { u8 chains; s8 chain_signal[IEEE80211_MAX_CHAINS]; + u8 proberesp_ecsa_stuck:1; + u8 bssid_index; u8 max_bssid_indicator; + u8 use_for; + u8 cannot_use_reasons; + u8 priv[] __aligned(sizeof(void *)); }; @@ -2891,12 +3037,15 @@ struct cfg80211_auth_request { * @elems_len: length of the elements * @disabled: If set this link should be included during association etc. but it * should not be used until enabled by the AP MLD. + * @error: per-link error code, must be <= 0. If there is an error, then the + * operation as a whole must fail. */ struct cfg80211_assoc_link { struct cfg80211_bss *bss; const u8 *elems; size_t elems_len; bool disabled; + int error; }; /** @@ -3088,8 +3237,8 @@ struct cfg80211_ibss_params { * * @behaviour: requested BSS selection behaviour. * @param: parameters for requestion behaviour. - * @band_pref: preferred band for %NL80211_BSS_SELECT_ATTR_BAND_PREF. - * @adjust: parameters for %NL80211_BSS_SELECT_ATTR_RSSI_ADJUST. + * @param.band_pref: preferred band for %NL80211_BSS_SELECT_ATTR_BAND_PREF. + * @param.adjust: parameters for %NL80211_BSS_SELECT_ATTR_RSSI_ADJUST. */ struct cfg80211_bss_selection { enum nl80211_bss_select_attr behaviour; @@ -3495,7 +3644,7 @@ struct cfg80211_update_ft_ies_params { * This structure provides information needed to transmit a mgmt frame * * @chan: channel to use - * @offchan: indicates wether off channel operation is required + * @offchan: indicates whether off channel operation is required * @wait: duration for ROC * @buf: buffer to transmit * @len: buffer length @@ -3613,7 +3762,7 @@ struct cfg80211_nan_func_filter { * @publish_bcast: if true, the solicited publish should be broadcasted * @subscribe_active: if true, the subscribe is active * @followup_id: the instance ID for follow up - * @followup_reqid: the requestor instance ID for follow up + * @followup_reqid: the requester instance ID for follow up * @followup_dest: MAC address of the recipient of the follow up * @ttl: time to live counter in DW. * @serv_spec_info: Service Specific Info @@ -4401,6 +4550,7 @@ struct mgmt_frame_regs { * @del_link_station: Remove a link of a station. * * @set_hw_timestamp: Enable/disable HW timestamping of TM/FTM frames. + * @set_ttlm: set the TID to link mapping. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -4450,7 +4600,7 @@ struct cfg80211_ops { int (*start_ap)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ap_settings *settings); int (*change_beacon)(struct wiphy *wiphy, struct net_device *dev, - struct cfg80211_beacon_data *info); + struct cfg80211_ap_update *info); int (*stop_ap)(struct wiphy *wiphy, struct net_device *dev, unsigned int link_id); @@ -4760,6 +4910,8 @@ struct cfg80211_ops { struct link_station_del_parameters *params); int (*set_hw_timestamp)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_set_hw_timestamp *hwts); + int (*set_ttlm)(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_ttlm_params *params); }; /* @@ -4816,6 +4968,10 @@ struct cfg80211_ops { * @WIPHY_FLAG_SUPPORTS_EXT_KCK_32: The device supports 32-byte KCK keys. * @WIPHY_FLAG_NOTIFY_REGDOM_BY_DRIVER: The device could handle reg notify for * NL80211_REGDOM_SET_BY_DRIVER. + * @WIPHY_FLAG_CHANNEL_CHANGE_ON_BEACON: reg_call_notifier() is called if driver + * set this flag to update channels on beacon hints. + * @WIPHY_FLAG_SUPPORTS_NSTR_NONPRIMARY: support connection to non-primary link + * of an NSTR mobile AP MLD. */ enum wiphy_flags { WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK = BIT(0), @@ -4829,7 +4985,7 @@ enum wiphy_flags { WIPHY_FLAG_IBSS_RSN = BIT(8), WIPHY_FLAG_MESH_AUTH = BIT(10), WIPHY_FLAG_SUPPORTS_EXT_KCK_32 = BIT(11), - /* use hole at 12 */ + WIPHY_FLAG_SUPPORTS_NSTR_NONPRIMARY = BIT(12), WIPHY_FLAG_SUPPORTS_FW_ROAM = BIT(13), WIPHY_FLAG_AP_UAPSD = BIT(14), WIPHY_FLAG_SUPPORTS_TDLS = BIT(15), @@ -4842,6 +4998,7 @@ enum wiphy_flags { WIPHY_FLAG_SUPPORTS_5_10_MHZ = BIT(22), WIPHY_FLAG_HAS_CHANNEL_SWITCH = BIT(23), WIPHY_FLAG_NOTIFY_REGDOM_BY_DRIVER = BIT(24), + WIPHY_FLAG_CHANNEL_CHANGE_ON_BEACON = BIT(25), }; /** @@ -5826,6 +5983,16 @@ void wiphy_work_queue(struct wiphy *wiphy, struct wiphy_work *work); */ void wiphy_work_cancel(struct wiphy *wiphy, struct wiphy_work *work); +/** + * wiphy_work_flush - flush previously queued work + * @wiphy: the wiphy, for debug purposes + * @work: the work to flush, this can be %NULL to flush all work + * + * Flush the work (i.e. run it if pending). This must be called + * under the wiphy mutex acquired by wiphy_lock(). + */ +void wiphy_work_flush(struct wiphy *wiphy, struct wiphy_work *work); + struct wiphy_delayed_work { struct wiphy_work work; struct wiphy *wiphy; @@ -5870,6 +6037,17 @@ void wiphy_delayed_work_cancel(struct wiphy *wiphy, struct wiphy_delayed_work *dwork); /** + * wiphy_delayed_work_flush - flush previously queued delayed work + * @wiphy: the wiphy, for debug purposes + * @dwork: the delayed work to flush + * + * Flush the work (i.e. run it if pending). This must be called + * under the wiphy mutex acquired by wiphy_lock(). + */ +void wiphy_delayed_work_flush(struct wiphy *wiphy, + struct wiphy_delayed_work *dwork); + +/** * struct wireless_dev - wireless device state * * For netdevs, this structure must be allocated by the driver @@ -5897,7 +6075,6 @@ void wiphy_delayed_work_cancel(struct wiphy *wiphy, * wireless device if it has no netdev * @u: union containing data specific to @iftype * @connected: indicates if connected or not (STA mode) - * @bssid: (private) Used by the internal configuration code * @wext: (private) Used by the internal wireless extensions compat code * @wext.ibss: (private) IBSS data part of wext handling * @wext.connect: (private) connection handling data @@ -5917,10 +6094,6 @@ void wiphy_delayed_work_cancel(struct wiphy *wiphy, * @mgmt_registrations: list of registrations for management frames * @mgmt_registrations_need_update: mgmt registrations were updated, * need to propagate the update to the driver - * @mtx: mutex used to lock data in this struct, may be used by drivers - * and some API functions require it held - * @beacon_interval: beacon interval used on this device for transmitting - * beacons, 0 when not valid * @address: The address for this device, valid only if @netdev is %NULL * @is_running: true if this is a non-netdev device that has been started, e.g. * the P2P Device. @@ -5941,6 +6114,7 @@ void wiphy_delayed_work_cancel(struct wiphy *wiphy, * @event_lock: (private) lock for event list * @owner_nlportid: (private) owner socket port ID * @nl_owner_dead: (private) owner socket went away + * @cqm_rssi_work: (private) CQM RSSI reporting work * @cqm_config: (private) nl80211 RSSI monitor state * @pmsr_list: (private) peer measurement requests * @pmsr_lock: (private) peer measurements requests/results lock @@ -5964,8 +6138,6 @@ struct wireless_dev { struct list_head mgmt_registrations; u8 mgmt_registrations_need_update:1; - struct mutex mtx; - bool use_4addr, is_running, registered, registering; u8 address[ETH_ALEN] __aligned(sizeof(u16)); @@ -6013,7 +6185,8 @@ struct wireless_dev { } wext; #endif - struct cfg80211_cqm_config *cqm_config; + struct wiphy_work cqm_rssi_work; + struct cfg80211_cqm_config __rcu *cqm_config; struct list_head pmsr_list; spinlock_t pmsr_lock; @@ -6255,13 +6428,11 @@ ieee80211_get_response_rate(struct ieee80211_supported_band *sband, /** * ieee80211_mandatory_rates - get mandatory rates for a given band * @sband: the band to look for rates in - * @scan_width: width of the control channel * * This function returns a bitmap of the mandatory rates for the given * band, bits are set according to the rate position in the bitrates array. */ -u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband, - enum nl80211_bss_scan_width scan_width); +u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband); /* * Radiotap parsing functions -- for controlled injection support @@ -6602,7 +6773,7 @@ static inline const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len) * @ies: data consisting of IEs * @len: length of data * - * Return: %NULL if the etended element could not be found or if + * Return: %NULL if the extended element could not be found or if * the element is invalid (claims to be longer than the given * data) or if the byte array doesn't match; otherwise return the * requested element struct. @@ -6749,7 +6920,7 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2); /** * regulatory_set_wiphy_regd - set regdom info for self managed drivers * @wiphy: the wireless device we want to process the regulatory domain on - * @rd: the regulatory domain informatoin to use for this wiphy + * @rd: the regulatory domain information to use for this wiphy * * Set the regulatory domain information for self-managed wiphys, only they * may use this function. See %REGULATORY_WIPHY_SELF_MANAGED for more @@ -6840,7 +7011,7 @@ bool regulatory_pre_cac_allowed(struct wiphy *wiphy); * Regulatory self-managed driver can use it to proactively * * @alpha2: the ISO/IEC 3166 alpha2 wmm rule to be queried. - * @freq: the freqency(in MHz) to be queried. + * @freq: the frequency (in MHz) to be queried. * @rule: pointer to store the wmm rule from the regulatory db. * * Self-managed wireless drivers can use this function to query @@ -6923,22 +7094,6 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, gfp_t gfp); static inline struct cfg80211_bss * __must_check -cfg80211_inform_bss_width_frame(struct wiphy *wiphy, - struct ieee80211_channel *rx_channel, - enum nl80211_bss_scan_width scan_width, - struct ieee80211_mgmt *mgmt, size_t len, - s32 signal, gfp_t gfp) -{ - struct cfg80211_inform_bss data = { - .chan = rx_channel, - .scan_width = scan_width, - .signal = signal, - }; - - return cfg80211_inform_bss_frame_data(wiphy, &data, mgmt, len, gfp); -} - -static inline struct cfg80211_bss * __must_check cfg80211_inform_bss_frame(struct wiphy *wiphy, struct ieee80211_channel *rx_channel, struct ieee80211_mgmt *mgmt, size_t len, @@ -6946,7 +7101,6 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy, { struct cfg80211_inform_bss data = { .chan = rx_channel, - .scan_width = NL80211_BSS_CHAN_WIDTH_20, .signal = signal, }; @@ -7021,6 +7175,23 @@ int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen, enum nl80211_band band); /** + * cfg80211_ssid_eq - compare two SSIDs + * @a: first SSID + * @b: second SSID + * + * Return: %true if SSIDs are equal, %false otherwise. + */ +static inline bool +cfg80211_ssid_eq(struct cfg80211_ssid *a, struct cfg80211_ssid *b) +{ + if (WARN_ON(!a || !b)) + return false; + if (a->ssid_len != b->ssid_len) + return false; + return memcmp(a->ssid, b->ssid, a->ssid_len) ? false : true; +} + +/** * cfg80211_inform_bss_data - inform cfg80211 of a new BSS * * @wiphy: the wiphy reporting the BSS @@ -7049,26 +7220,6 @@ cfg80211_inform_bss_data(struct wiphy *wiphy, gfp_t gfp); static inline struct cfg80211_bss * __must_check -cfg80211_inform_bss_width(struct wiphy *wiphy, - struct ieee80211_channel *rx_channel, - enum nl80211_bss_scan_width scan_width, - enum cfg80211_bss_frame_type ftype, - const u8 *bssid, u64 tsf, u16 capability, - u16 beacon_interval, const u8 *ie, size_t ielen, - s32 signal, gfp_t gfp) -{ - struct cfg80211_inform_bss data = { - .chan = rx_channel, - .scan_width = scan_width, - .signal = signal, - }; - - return cfg80211_inform_bss_data(wiphy, &data, ftype, bssid, tsf, - capability, beacon_interval, ie, ielen, - gfp); -} - -static inline struct cfg80211_bss * __must_check cfg80211_inform_bss(struct wiphy *wiphy, struct ieee80211_channel *rx_channel, enum cfg80211_bss_frame_type ftype, @@ -7078,7 +7229,6 @@ cfg80211_inform_bss(struct wiphy *wiphy, { struct cfg80211_inform_bss data = { .chan = rx_channel, - .scan_width = NL80211_BSS_CHAN_WIDTH_20, .signal = signal, }; @@ -7088,6 +7238,25 @@ cfg80211_inform_bss(struct wiphy *wiphy, } /** + * __cfg80211_get_bss - get a BSS reference + * @wiphy: the wiphy this BSS struct belongs to + * @channel: the channel to search on (or %NULL) + * @bssid: the desired BSSID (or %NULL) + * @ssid: the desired SSID (or %NULL) + * @ssid_len: length of the SSID (or 0) + * @bss_type: type of BSS, see &enum ieee80211_bss_type + * @privacy: privacy filter, see &enum ieee80211_privacy + * @use_for: indicates which use is intended + */ +struct cfg80211_bss *__cfg80211_get_bss(struct wiphy *wiphy, + struct ieee80211_channel *channel, + const u8 *bssid, + const u8 *ssid, size_t ssid_len, + enum ieee80211_bss_type bss_type, + enum ieee80211_privacy privacy, + u32 use_for); + +/** * cfg80211_get_bss - get a BSS reference * @wiphy: the wiphy this BSS struct belongs to * @channel: the channel to search on (or %NULL) @@ -7096,13 +7265,20 @@ cfg80211_inform_bss(struct wiphy *wiphy, * @ssid_len: length of the SSID (or 0) * @bss_type: type of BSS, see &enum ieee80211_bss_type * @privacy: privacy filter, see &enum ieee80211_privacy + * + * This version implies regular usage, %NL80211_BSS_USE_FOR_NORMAL. */ -struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy, - struct ieee80211_channel *channel, - const u8 *bssid, - const u8 *ssid, size_t ssid_len, - enum ieee80211_bss_type bss_type, - enum ieee80211_privacy privacy); +static inline struct cfg80211_bss * +cfg80211_get_bss(struct wiphy *wiphy, struct ieee80211_channel *channel, + const u8 *bssid, const u8 *ssid, size_t ssid_len, + enum ieee80211_bss_type bss_type, + enum ieee80211_privacy privacy) +{ + return __cfg80211_get_bss(wiphy, channel, bssid, ssid, ssid_len, + bss_type, privacy, + NL80211_BSS_USE_FOR_NORMAL); +} + static inline struct cfg80211_bss * cfg80211_get_ibss(struct wiphy *wiphy, struct ieee80211_channel *channel, @@ -7163,19 +7339,6 @@ void cfg80211_bss_iter(struct wiphy *wiphy, void *data), void *iter_data); -static inline enum nl80211_bss_scan_width -cfg80211_chandef_to_scan_width(const struct cfg80211_chan_def *chandef) -{ - switch (chandef->width) { - case NL80211_CHAN_WIDTH_5: - return NL80211_BSS_CHAN_WIDTH_5; - case NL80211_CHAN_WIDTH_10: - return NL80211_BSS_CHAN_WIDTH_10; - default: - return NL80211_BSS_CHAN_WIDTH_20; - } -} - /** * cfg80211_rx_mlme_mgmt - notification of processed MLME management frame * @dev: network device @@ -7208,9 +7371,7 @@ void cfg80211_rx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len); void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr); /** - * struct cfg80211_rx_assoc_resp - association response data - * @bss: the BSS that association was requested with, ownership of the pointer - * moves to cfg80211 in the call to cfg80211_rx_assoc_resp() + * struct cfg80211_rx_assoc_resp_data - association response data * @buf: (Re)Association Response frame (header + body) * @len: length of the frame data * @uapsd_queues: bitmap of queues configured for uapsd. Same format @@ -7220,10 +7381,12 @@ void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr); * @ap_mld_addr: AP MLD address (in case of MLO) * @links: per-link information indexed by link ID, use links[0] for * non-MLO connections + * @links.bss: the BSS that association was requested with, ownership of the + * pointer moves to cfg80211 in the call to cfg80211_rx_assoc_resp() * @links.status: Set this (along with a BSS pointer) for links that * were rejected by the AP. */ -struct cfg80211_rx_assoc_resp { +struct cfg80211_rx_assoc_resp_data { const u8 *buf; size_t len; const u8 *req_ies; @@ -7231,7 +7394,7 @@ struct cfg80211_rx_assoc_resp { int uapsd_queues; const u8 *ap_mld_addr; struct { - const u8 *addr; + u8 addr[ETH_ALEN] __aligned(2); struct cfg80211_bss *bss; u16 status; } links[IEEE80211_MLD_MAX_NUM_LINKS]; @@ -7240,7 +7403,7 @@ struct cfg80211_rx_assoc_resp { /** * cfg80211_rx_assoc_resp - notification of processed association response * @dev: network device - * @data: association response data, &struct cfg80211_rx_assoc_resp + * @data: association response data, &struct cfg80211_rx_assoc_resp_data * * After being asked to associate via cfg80211_ops::assoc() the driver must * call either this function or cfg80211_auth_timeout(). @@ -7248,7 +7411,7 @@ struct cfg80211_rx_assoc_resp { * This function may sleep. The caller must hold the corresponding wdev's mutex. */ void cfg80211_rx_assoc_resp(struct net_device *dev, - struct cfg80211_rx_assoc_resp *data); + const struct cfg80211_rx_assoc_resp_data *data); /** * struct cfg80211_assoc_failure - association failure data @@ -7367,7 +7530,7 @@ void cfg80211_notify_new_peer_candidate(struct net_device *dev, * RFkill integration in cfg80211 is almost invisible to drivers, * as cfg80211 automatically registers an rfkill instance for each * wireless device it knows about. Soft kill is also translated - * into disconnecting and turning all interfaces off, drivers are + * into disconnecting and turning all interfaces off. Drivers are * expected to turn off the device when all interfaces are down. * * However, devices may have a hard RFkill line, in which case they @@ -7415,7 +7578,7 @@ static inline void wiphy_rfkill_stop_polling(struct wiphy *wiphy) * the configuration mechanism. * * A driver supporting vendor commands must register them as an array - * in struct wiphy, with handlers for each one, each command has an + * in struct wiphy, with handlers for each one. Each command has an * OUI and sub command ID to identify it. * * Note that this feature should not be (ab)used to implement protocol @@ -7579,7 +7742,7 @@ static inline void cfg80211_vendor_event(struct sk_buff *skb, gfp_t gfp) * interact with driver-specific tools to aid, for instance, * factory programming. * - * This chapter describes how drivers interact with it, for more + * This chapter describes how drivers interact with it. For more * information see the nl80211 book's chapter on it. */ @@ -7967,7 +8130,8 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info, * cfg80211_port_authorized - notify cfg80211 of successful security association * * @dev: network device - * @bssid: the BSSID of the AP + * @peer_addr: BSSID of the AP/P2P GO in case of STA/GC or STA/GC MAC address + * in case of AP/P2P GO * @td_bitmap: transition disable policy * @td_bitmap_len: Length of transition disable policy * @gfp: allocation flags @@ -7978,8 +8142,11 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info, * should be preceded with a call to cfg80211_connect_result(), * cfg80211_connect_done(), cfg80211_connect_bss() or cfg80211_roamed() to * indicate the 802.11 association. + * This function can also be called by AP/P2P GO driver that supports + * authentication offload. In this case the peer_mac passed is that of + * associated STA/GC. */ -void cfg80211_port_authorized(struct net_device *dev, const u8 *bssid, +void cfg80211_port_authorized(struct net_device *dev, const u8 *peer_addr, const u8* td_bitmap, u8 td_bitmap_len, gfp_t gfp); /** @@ -8568,7 +8735,7 @@ bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy, * @link_id: the link ID for MLO, must be 0 for non-MLO * @punct_bitmap: the new puncturing bitmap * - * Caller must acquire wdev_lock, therefore must only be called from sleepable + * Caller must hold wiphy mutex, therefore must only be called from sleepable * driver context! */ void cfg80211_ch_switch_notify(struct net_device *dev, @@ -8808,6 +8975,18 @@ static inline size_t ieee80211_ie_split(const u8 *ies, size_t ielen, } /** + * ieee80211_fragment_element - fragment the last element in skb + * @skb: The skbuf that the element was added to + * @len_pos: Pointer to length of the element to fragment + * @frag_id: The element ID to use for fragments + * + * This function fragments all data after @len_pos, adding fragmentation + * elements with the given ID as appropriate. The SKB will grow in size + * accordingly. + */ +void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos, u8 frag_id); + +/** * cfg80211_report_wowlan_wakeup - report wakeup from WoWLAN * @wdev: the wireless device reporting the wakeup * @wakeup: the wakeup report @@ -9056,9 +9235,9 @@ bool cfg80211_iftype_allowed(struct wiphy *wiphy, enum nl80211_iftype iftype, /** * cfg80211_assoc_comeback - notification of association that was - * temporarly rejected with a comeback + * temporarily rejected with a comeback * @netdev: network device - * @ap_addr: AP (MLD) address that rejected the assocation + * @ap_addr: AP (MLD) address that rejected the association * @timeout: timeout interval value TUs. * * this function may sleep. the caller must hold the corresponding wdev's mutex. @@ -9222,4 +9401,60 @@ bool cfg80211_valid_disable_subchannel_bitmap(u16 *bitmap, */ void cfg80211_links_removed(struct net_device *dev, u16 link_mask); +/** + * cfg80211_schedule_channels_check - schedule regulatory check if needed + * @wdev: the wireless device to check + * + * In case the device supports NO_IR or DFS relaxations, schedule regulatory + * channels check, as previous concurrent operation conditions may not + * hold anymore. + */ +void cfg80211_schedule_channels_check(struct wireless_dev *wdev); + +#ifdef CONFIG_CFG80211_DEBUGFS +/** + * wiphy_locked_debugfs_read - do a locked read in debugfs + * @wiphy: the wiphy to use + * @file: the file being read + * @buf: the buffer to fill and then read from + * @bufsize: size of the buffer + * @userbuf: the user buffer to copy to + * @count: read count + * @ppos: read position + * @handler: the read handler to call (under wiphy lock) + * @data: additional data to pass to the read handler + */ +ssize_t wiphy_locked_debugfs_read(struct wiphy *wiphy, struct file *file, + char *buf, size_t bufsize, + char __user *userbuf, size_t count, + loff_t *ppos, + ssize_t (*handler)(struct wiphy *wiphy, + struct file *file, + char *buf, + size_t bufsize, + void *data), + void *data); + +/** + * wiphy_locked_debugfs_write - do a locked write in debugfs + * @wiphy: the wiphy to use + * @file: the file being written to + * @buf: the buffer to copy the user data to + * @bufsize: size of the buffer + * @userbuf: the user buffer to copy from + * @count: read count + * @handler: the write handler to call (under wiphy lock) + * @data: additional data to pass to the write handler + */ +ssize_t wiphy_locked_debugfs_write(struct wiphy *wiphy, struct file *file, + char *buf, size_t bufsize, + const char __user *userbuf, size_t count, + ssize_t (*handler)(struct wiphy *wiphy, + struct file *file, + char *buf, + size_t count, + void *data), + void *data); +#endif + #endif /* __NET_CFG80211_H */ diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index f79ce133e51a..cd95711b12b8 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -20,6 +20,7 @@ struct wpan_phy; struct wpan_phy_cca; struct cfg802154_scan_request; struct cfg802154_beacon_request; +struct ieee802154_addr; #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL struct ieee802154_llsec_device_key; @@ -77,6 +78,12 @@ struct cfg802154_ops { struct cfg802154_beacon_request *request); int (*stop_beacons)(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev); + int (*associate)(struct wpan_phy *wpan_phy, + struct wpan_dev *wpan_dev, + struct ieee802154_addr *coord); + int (*disassociate)(struct wpan_phy *wpan_phy, + struct wpan_dev *wpan_dev, + struct ieee802154_addr *target); #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL void (*get_llsec_table)(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, @@ -304,6 +311,22 @@ struct ieee802154_coord_desc { }; /** + * struct ieee802154_pan_device - PAN device information + * @pan_id: the PAN ID of this device + * @mode: the preferred mode to reach the device + * @short_addr: the short address of this device + * @extended_addr: the extended address of this device + * @node: the list node + */ +struct ieee802154_pan_device { + __le16 pan_id; + u8 mode; + __le16 short_addr; + __le64 extended_addr; + struct list_head node; +}; + +/** * struct cfg802154_scan_request - Scan request * * @type: type of scan to be performed @@ -478,6 +501,13 @@ struct wpan_dev { /* fallback for acknowledgment bit setting */ bool ackreq; + + /* Associations */ + struct mutex association_lock; + struct ieee802154_pan_device *parent; + struct list_head children; + unsigned int max_associations; + unsigned int nchildren; }; #define to_phy(_dev) container_of(_dev, struct wpan_phy, dev) @@ -529,4 +559,46 @@ static inline const char *wpan_phy_name(struct wpan_phy *phy) void ieee802154_configure_durations(struct wpan_phy *phy, unsigned int page, unsigned int channel); +/** + * cfg802154_device_is_associated - Checks whether we are associated to any device + * @wpan_dev: the wpan device + * @return: true if we are associated + */ +bool cfg802154_device_is_associated(struct wpan_dev *wpan_dev); + +/** + * cfg802154_device_is_parent - Checks if a device is our coordinator + * @wpan_dev: the wpan device + * @target: the expected parent + * @return: true if @target is our coordinator + */ +bool cfg802154_device_is_parent(struct wpan_dev *wpan_dev, + struct ieee802154_addr *target); + +/** + * cfg802154_device_is_child - Checks whether a device is associated to us + * @wpan_dev: the wpan device + * @target: the expected child + * @return: the PAN device + */ +struct ieee802154_pan_device * +cfg802154_device_is_child(struct wpan_dev *wpan_dev, + struct ieee802154_addr *target); + +/** + * cfg802154_set_max_associations - Limit the number of future associations + * @wpan_dev: the wpan device + * @max: the maximum number of devices we accept to associate + * @return: the old maximum value + */ +unsigned int cfg802154_set_max_associations(struct wpan_dev *wpan_dev, + unsigned int max); + +/** + * cfg802154_get_free_short_addr - Get a free address among the known devices + * @wpan_dev: the wpan device + * @return: a random short address expectedly unused on our PAN + */ +__le16 cfg802154_get_free_short_addr(struct wpan_dev *wpan_dev); + #endif /* __NET_CFG802154_H */ diff --git a/include/net/devlink.h b/include/net/devlink.h index 29fd1b4ee654..9ac394bdfbe4 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -150,6 +150,7 @@ struct devlink_port { struct devlink_rate *devlink_rate; struct devlink_linecard *linecard; + u32 rel_index; }; struct devlink_port_new_attrs { @@ -1697,6 +1698,8 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 controller, u16 pf, u32 sf, bool external); +int devl_port_fn_devlink_set(struct devlink_port *devlink_port, + struct devlink *fn_devlink); struct devlink_rate * devl_rate_node_create(struct devlink *devlink, void *priv, char *node_name, struct devlink_rate *parent); @@ -1717,8 +1720,8 @@ void devlink_linecard_provision_clear(struct devlink_linecard *linecard); void devlink_linecard_provision_fail(struct devlink_linecard *linecard); void devlink_linecard_activate(struct devlink_linecard *linecard); void devlink_linecard_deactivate(struct devlink_linecard *linecard); -void devlink_linecard_nested_dl_set(struct devlink_linecard *linecard, - struct devlink *nested_devlink); +int devlink_linecard_nested_dl_set(struct devlink_linecard *linecard, + struct devlink *nested_devlink); int devl_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, u16 egress_pools_count, u16 ingress_tc_count, @@ -1851,36 +1854,36 @@ int devlink_info_version_running_put_ext(struct devlink_info_req *req, const char *version_value, enum devlink_info_version_type version_type); -int devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg); -int devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg); - -int devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name); -int devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg); - -int devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg, - const char *name); -int devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg); -int devlink_fmsg_binary_pair_nest_start(struct devlink_fmsg *fmsg, - const char *name); -int devlink_fmsg_binary_pair_nest_end(struct devlink_fmsg *fmsg); - -int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value); -int devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value); -int devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value, - u16 value_len); - -int devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name, - bool value); -int devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name, - u8 value); -int devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name, - u32 value); -int devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name, - u64 value); -int devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name, - const char *value); -int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name, - const void *value, u32 value_len); +void devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg); +void devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg); + +void devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name); +void devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg); + +void devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg, + const char *name); +void devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg); +void devlink_fmsg_binary_pair_nest_start(struct devlink_fmsg *fmsg, + const char *name); +void devlink_fmsg_binary_pair_nest_end(struct devlink_fmsg *fmsg); + +void devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value); +void devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value); +void devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value, + u16 value_len); + +void devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name, + bool value); +void devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name, + u8 value); +void devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name, + u32 value); +void devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name, + u64 value); +void devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name, + const char *value); +void devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name, + const void *value, u32 value_len); struct devlink_health_reporter * devl_port_health_reporter_create(struct devlink_port *port, @@ -1918,6 +1921,8 @@ devlink_health_reporter_state_update(struct devlink_health_reporter *reporter, void devlink_health_reporter_recovery_done(struct devlink_health_reporter *reporter); +int devl_nested_devlink_set(struct devlink *devlink, + struct devlink *nested_devlink); bool devlink_is_reload_failed(const struct devlink *devlink); void devlink_remote_reload_actions_performed(struct devlink *devlink, enum devlink_reload_limit limit, diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index a587e83fc169..6d3a20163260 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -20,9 +20,14 @@ FN(IP_NOPROTO) \ FN(SOCKET_RCVBUFF) \ FN(PROTO_MEM) \ + FN(TCP_AUTH_HDR) \ FN(TCP_MD5NOTFOUND) \ FN(TCP_MD5UNEXPECTED) \ FN(TCP_MD5FAILURE) \ + FN(TCP_AONOTFOUND) \ + FN(TCP_AOUNEXPECTED) \ + FN(TCP_AOKEYNOTFOUND) \ + FN(TCP_AOFAILURE) \ FN(SOCKET_BACKLOG) \ FN(TCP_FLAGS) \ FN(TCP_ZEROWINDOW) \ @@ -80,6 +85,10 @@ FN(IPV6_NDISC_BAD_OPTIONS) \ FN(IPV6_NDISC_NS_OTHERHOST) \ FN(QUEUE_PURGE) \ + FN(TC_COOKIE_ERROR) \ + FN(PACKET_SOCK_ERROR) \ + FN(TC_CHAIN_NOTFOUND) \ + FN(TC_RECLASSIFY_LOOP) \ FNe(MAX) /** @@ -142,6 +151,11 @@ enum skb_drop_reason { */ SKB_DROP_REASON_PROTO_MEM, /** + * @SKB_DROP_REASON_TCP_AUTH_HDR: TCP-MD5 or TCP-AO hashes are met + * twice or set incorrectly. + */ + SKB_DROP_REASON_TCP_AUTH_HDR, + /** * @SKB_DROP_REASON_TCP_MD5NOTFOUND: no MD5 hash and one expected, * corresponding to LINUX_MIB_TCPMD5NOTFOUND */ @@ -157,6 +171,26 @@ enum skb_drop_reason { */ SKB_DROP_REASON_TCP_MD5FAILURE, /** + * @SKB_DROP_REASON_TCP_AONOTFOUND: no TCP-AO hash and one was expected, + * corresponding to LINUX_MIB_TCPAOREQUIRED + */ + SKB_DROP_REASON_TCP_AONOTFOUND, + /** + * @SKB_DROP_REASON_TCP_AOUNEXPECTED: TCP-AO hash is present and it + * was not expected, corresponding to LINUX_MIB_TCPAOKEYNOTFOUND + */ + SKB_DROP_REASON_TCP_AOUNEXPECTED, + /** + * @SKB_DROP_REASON_TCP_AOKEYNOTFOUND: TCP-AO key is unknown, + * corresponding to LINUX_MIB_TCPAOKEYNOTFOUND + */ + SKB_DROP_REASON_TCP_AOKEYNOTFOUND, + /** + * @SKB_DROP_REASON_TCP_AOFAILURE: TCP-AO hash is wrong, + * corresponding to LINUX_MIB_TCPAOBAD + */ + SKB_DROP_REASON_TCP_AOFAILURE, + /** * @SKB_DROP_REASON_SOCKET_BACKLOG: failed to add skb to socket backlog ( * see LINUX_MIB_TCPBACKLOGDROP) */ @@ -346,6 +380,23 @@ enum skb_drop_reason { /** @SKB_DROP_REASON_QUEUE_PURGE: bulk free. */ SKB_DROP_REASON_QUEUE_PURGE, /** + * @SKB_DROP_REASON_TC_COOKIE_ERROR: An error occurred whilst + * processing a tc ext cookie. + */ + SKB_DROP_REASON_TC_COOKIE_ERROR, + /** + * @SKB_DROP_REASON_PACKET_SOCK_ERROR: generic packet socket errors + * after its filter matches an incoming packet. + */ + SKB_DROP_REASON_PACKET_SOCK_ERROR, + /** @SKB_DROP_REASON_TC_CHAIN_NOTFOUND: tc chain lookup failed. */ + SKB_DROP_REASON_TC_CHAIN_NOTFOUND, + /** + * @SKB_DROP_REASON_TC_RECLASSIFY_LOOP: tc exceeded max reclassify loop + * iterations. + */ + SKB_DROP_REASON_TC_RECLASSIFY_LOOP, + /** * @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which * shouldn't be used as a real 'reason' - only for tracing code gen */ diff --git a/include/net/dsa.h b/include/net/dsa.h index 0b9c6aa27047..82135fbdb1e6 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -102,11 +102,11 @@ struct dsa_device_ops { const char *name; enum dsa_tag_protocol proto; /* Some tagging protocols either mangle or shift the destination MAC - * address, in which case the DSA master would drop packets on ingress + * address, in which case the DSA conduit would drop packets on ingress * if what it understands out of the destination MAC address is not in * its RX filter. */ - bool promisc_on_master; + bool promisc_on_conduit; }; struct dsa_lag { @@ -236,12 +236,12 @@ struct dsa_bridge { }; struct dsa_port { - /* A CPU port is physically connected to a master device. - * A user port exposed to userspace has a slave device. + /* A CPU port is physically connected to a conduit device. A user port + * exposes a network device to user-space, called 'user' here. */ union { - struct net_device *master; - struct net_device *slave; + struct net_device *conduit; + struct net_device *user; }; /* Copy of the tagging protocol operations, for quicker access @@ -249,7 +249,7 @@ struct dsa_port { */ const struct dsa_device_ops *tag_ops; - /* Copies for faster access in master receive hot path */ + /* Copies for faster access in conduit receive hot path */ struct dsa_switch_tree *dst; struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev); @@ -281,9 +281,9 @@ struct dsa_port { u8 lag_tx_enabled:1; - /* Master state bits, valid only on CPU ports */ - u8 master_admin_up:1; - u8 master_oper_up:1; + /* conduit state bits, valid only on CPU ports */ + u8 conduit_admin_up:1; + u8 conduit_oper_up:1; /* Valid only on user ports */ u8 cpu_port_in_lag:1; @@ -303,7 +303,7 @@ struct dsa_port { struct list_head list; /* - * Original copy of the master netdev ethtool_ops + * Original copy of the conduit netdev ethtool_ops */ const struct ethtool_ops *orig_ethtool_ops; @@ -452,10 +452,10 @@ struct dsa_switch { const struct dsa_switch_ops *ops; /* - * Slave mii_bus and devices for the individual ports. + * User mii_bus and devices for the individual ports. */ u32 phys_mii_mask; - struct mii_bus *slave_mii_bus; + struct mii_bus *user_mii_bus; /* Ageing Time limits in msecs */ unsigned int ageing_time_min; @@ -520,10 +520,10 @@ static inline bool dsa_port_is_unused(struct dsa_port *dp) return dp->type == DSA_PORT_TYPE_UNUSED; } -static inline bool dsa_port_master_is_operational(struct dsa_port *dp) +static inline bool dsa_port_conduit_is_operational(struct dsa_port *dp) { - return dsa_port_is_cpu(dp) && dp->master_admin_up && - dp->master_oper_up; + return dsa_port_is_cpu(dp) && dp->conduit_admin_up && + dp->conduit_oper_up; } static inline bool dsa_is_unused_port(struct dsa_switch *ds, int p) @@ -713,12 +713,12 @@ static inline bool dsa_port_offloads_lag(struct dsa_port *dp, return dsa_port_lag_dev_get(dp) == lag->dev; } -static inline struct net_device *dsa_port_to_master(const struct dsa_port *dp) +static inline struct net_device *dsa_port_to_conduit(const struct dsa_port *dp) { if (dp->cpu_port_in_lag) return dsa_port_lag_dev_get(dp->cpu_dp); - return dp->cpu_dp->master; + return dp->cpu_dp->conduit; } static inline @@ -732,7 +732,7 @@ struct net_device *dsa_port_to_bridge_port(const struct dsa_port *dp) else if (dp->hsr_dev) return dp->hsr_dev; - return dp->slave; + return dp->user; } static inline struct net_device * @@ -834,9 +834,9 @@ struct dsa_switch_ops { int (*connect_tag_protocol)(struct dsa_switch *ds, enum dsa_tag_protocol proto); - int (*port_change_master)(struct dsa_switch *ds, int port, - struct net_device *master, - struct netlink_ext_ack *extack); + int (*port_change_conduit)(struct dsa_switch *ds, int port, + struct net_device *conduit, + struct netlink_ext_ack *extack); /* Optional switch-wide initialization and destruction methods */ int (*setup)(struct dsa_switch *ds); @@ -969,6 +969,16 @@ struct dsa_switch_ops { struct phy_device *phy); void (*port_disable)(struct dsa_switch *ds, int port); + + /* + * Notification for MAC address changes on user ports. Drivers can + * currently only veto operations. They should not use the method to + * program the hardware, since the operation is not rolled back in case + * of other errors. + */ + int (*port_set_mac_address)(struct dsa_switch *ds, int port, + const unsigned char *addr); + /* * Compatibility between device trees defining multiple CPU ports and * drivers which are not OK to use by default the numerically smallest @@ -1198,7 +1208,8 @@ struct dsa_switch_ops { * HSR integration */ int (*port_hsr_join)(struct dsa_switch *ds, int port, - struct net_device *hsr); + struct net_device *hsr, + struct netlink_ext_ack *extack); int (*port_hsr_leave)(struct dsa_switch *ds, int port, struct net_device *hsr); @@ -1222,11 +1233,11 @@ struct dsa_switch_ops { int (*tag_8021q_vlan_del)(struct dsa_switch *ds, int port, u16 vid); /* - * DSA master tracking operations + * DSA conduit tracking operations */ - void (*master_state_change)(struct dsa_switch *ds, - const struct net_device *master, - bool operational); + void (*conduit_state_change)(struct dsa_switch *ds, + const struct net_device *conduit, + bool operational); }; #define DSA_DEVLINK_PARAM_DRIVER(_id, _name, _type, _cmodes) \ @@ -1363,9 +1374,9 @@ static inline int dsa_switch_resume(struct dsa_switch *ds) #endif /* CONFIG_PM_SLEEP */ #if IS_ENABLED(CONFIG_NET_DSA) -bool dsa_slave_dev_check(const struct net_device *dev); +bool dsa_user_dev_check(const struct net_device *dev); #else -static inline bool dsa_slave_dev_check(const struct net_device *dev) +static inline bool dsa_user_dev_check(const struct net_device *dev) { return false; } diff --git a/include/net/dsa_stubs.h b/include/net/dsa_stubs.h index 361811750a54..6f384897f287 100644 --- a/include/net/dsa_stubs.h +++ b/include/net/dsa_stubs.h @@ -13,14 +13,14 @@ extern const struct dsa_stubs *dsa_stubs; struct dsa_stubs { - int (*master_hwtstamp_validate)(struct net_device *dev, - const struct kernel_hwtstamp_config *config, - struct netlink_ext_ack *extack); + int (*conduit_hwtstamp_validate)(struct net_device *dev, + const struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); }; -static inline int dsa_master_hwtstamp_validate(struct net_device *dev, - const struct kernel_hwtstamp_config *config, - struct netlink_ext_ack *extack) +static inline int dsa_conduit_hwtstamp_validate(struct net_device *dev, + const struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { if (!netdev_uses_dsa(dev)) return 0; @@ -29,18 +29,18 @@ static inline int dsa_master_hwtstamp_validate(struct net_device *dev, * netdev_uses_dsa() returns true, the dsa_core module is still * registered, and so, dsa_unregister_stubs() couldn't have run. * For netdev_uses_dsa() to start returning false, it would imply that - * dsa_master_teardown() has executed, which requires rtnl_lock(). + * dsa_conduit_teardown() has executed, which requires rtnl_lock(). */ ASSERT_RTNL(); - return dsa_stubs->master_hwtstamp_validate(dev, config, extack); + return dsa_stubs->conduit_hwtstamp_validate(dev, config, extack); } #else -static inline int dsa_master_hwtstamp_validate(struct net_device *dev, - const struct kernel_hwtstamp_config *config, - struct netlink_ext_ack *extack) +static inline int dsa_conduit_hwtstamp_validate(struct net_device *dev, + const struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { return 0; } diff --git a/include/net/dst.h b/include/net/dst.h index 78884429deed..f5dfc8fb7b37 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -222,13 +222,6 @@ static inline unsigned long dst_metric_rtt(const struct dst_entry *dst, int metr return msecs_to_jiffies(dst_metric(dst, metric)); } -static inline u32 -dst_allfrag(const struct dst_entry *dst) -{ - int ret = dst_feature(dst, RTAX_FEATURE_ALLFRAG); - return ret; -} - static inline int dst_metric_locked(const struct dst_entry *dst, int metric) { @@ -392,10 +385,10 @@ static inline int dst_discard(struct sk_buff *skb) { return dst_discard_out(&init_net, skb->sk, skb); } -void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_ref, +void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_obsolete, unsigned short flags); void dst_init(struct dst_entry *dst, struct dst_ops *ops, - struct net_device *dev, int initial_ref, int initial_obsolete, + struct net_device *dev, int initial_obsolete, unsigned short flags); struct dst_entry *dst_destroy(struct dst_entry *dst); void dst_dev_put(struct dst_entry *dst); diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 82da359bca03..d17855c52ef9 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -172,8 +172,7 @@ void fib_rules_unregister(struct fib_rules_ops *); int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags, struct fib_lookup_arg *); -int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table, - u32 flags); +int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table); bool fib_rule_matchall(const struct fib_rule *rule); int fib_rules_dump(struct net *net, struct notifier_block *nb, int family, struct netlink_ext_ack *extack); diff --git a/include/net/flow.h b/include/net/flow.h index 7f0adda3bf2f..335bbc52171c 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -40,8 +40,8 @@ struct flowi_common { #define FLOWI_FLAG_KNOWN_NH 0x02 __u32 flowic_secid; kuid_t flowic_uid; - struct flowi_tunnel flowic_tun_key; __u32 flowic_multipath_hash; + struct flowi_tunnel flowic_tun_key; }; union flowi_uli { diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 9efa9a59e81f..314087a5e181 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -333,7 +333,7 @@ struct flow_action_entry { struct flow_action { unsigned int num_entries; - struct flow_action_entry entries[]; + struct flow_action_entry entries[] __counted_by(num_entries); }; static inline bool flow_action_has_entries(const struct flow_action *action) diff --git a/include/net/genetlink.h b/include/net/genetlink.h index e18a4c0d69ee..e61469129402 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -8,10 +8,15 @@ #define GENLMSG_DEFAULT_SIZE (NLMSG_DEFAULT_SIZE - GENL_HDRLEN) +/* Binding to multicast group requires %CAP_NET_ADMIN */ +#define GENL_MCAST_CAP_NET_ADMIN BIT(0) +/* Binding to multicast group requires %CAP_SYS_ADMIN */ +#define GENL_MCAST_CAP_SYS_ADMIN BIT(1) + /** * struct genl_multicast_group - generic netlink multicast group * @name: name of the multicast group, names are per-family - * @flags: GENL_* flags (%GENL_ADMIN_PERM or %GENL_UNS_ADMIN_PERM) + * @flags: GENL_MCAST_* flags */ struct genl_multicast_group { char name[GENL_NAMSIZ]; @@ -49,6 +54,9 @@ struct genl_info; * @split_ops: the split do/dump form of operation definition * @n_split_ops: number of entries in @split_ops, not that with split do/dump * ops the number of entries is not the same as number of commands + * @sock_priv_size: the size of per-socket private memory + * @sock_priv_init: the per-socket private memory initializer + * @sock_priv_destroy: the per-socket private memory destructor * * Attribute policies (the combination of @policy and @maxattr fields) * can be attached at the family level or at the operation level. @@ -82,11 +90,17 @@ struct genl_family { const struct genl_multicast_group *mcgrps; struct module *module; + size_t sock_priv_size; + void (*sock_priv_init)(void *priv); + void (*sock_priv_destroy)(void *priv); + /* private: internal use only */ /* protocol family identifier */ int id; /* starting number of multicast group IDs in this family */ unsigned int mcgrp_offset; + /* list of per-socket privs */ + struct xarray *sock_privs; }; /** @@ -296,6 +310,8 @@ static inline bool genl_info_is_ntf(const struct genl_info *info) return !info->nlhdr; } +void *__genl_sk_priv_get(struct genl_family *family, struct sock *sk); +void *genl_sk_priv_get(struct genl_family *family, struct sock *sk); int genl_register_family(struct genl_family *family); int genl_unregister_family(const struct genl_family *family); void genl_notify(const struct genl_family *family, struct sk_buff *skb, @@ -436,6 +452,35 @@ static inline void genlmsg_cancel(struct sk_buff *skb, void *hdr) } /** + * genlmsg_multicast_netns_filtered - multicast a netlink message + * to a specific netns with filter + * function + * @family: the generic netlink family + * @net: the net namespace + * @skb: netlink message as socket buffer + * @portid: own netlink portid to avoid sending to yourself + * @group: offset of multicast group in groups array + * @flags: allocation flags + * @filter: filter function + * @filter_data: filter function private data + * + * Return: 0 on success, negative error code for failure. + */ +static inline int +genlmsg_multicast_netns_filtered(const struct genl_family *family, + struct net *net, struct sk_buff *skb, + u32 portid, unsigned int group, gfp_t flags, + netlink_filter_fn filter, + void *filter_data) +{ + if (WARN_ON_ONCE(group >= family->n_mcgrps)) + return -EINVAL; + group = family->mcgrp_offset + group; + return nlmsg_multicast_filtered(net->genl_sock, skb, portid, group, + flags, filter, filter_data); +} + +/** * genlmsg_multicast_netns - multicast a netlink message to a specific netns * @family: the generic netlink family * @net: the net namespace @@ -448,10 +493,8 @@ static inline int genlmsg_multicast_netns(const struct genl_family *family, struct net *net, struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags) { - if (WARN_ON_ONCE(group >= family->n_mcgrps)) - return -EINVAL; - group = family->mcgrp_offset + group; - return nlmsg_multicast(net->genl_sock, skb, portid, group, flags); + return genlmsg_multicast_netns_filtered(family, net, skb, portid, + group, flags, NULL, NULL); } /** diff --git a/include/net/gro.h b/include/net/gro.h index 88644b3ca660..b435f0ddbf64 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -41,7 +41,7 @@ struct napi_gro_cb { /* Number of segments aggregated. */ u16 count; - /* Used in ipv6_gro_receive() and foo-over-udp */ + /* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */ u16 proto; /* Used in napi_gro_cb::free */ diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index 2338f8d2a8b3..925bac726a92 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -539,6 +539,12 @@ enum ieee80211_radiotap_eht_usig_common { IEEE80211_RADIOTAP_EHT_USIG_COMMON_VALIDATE_BITS_OK = 0x00000080, IEEE80211_RADIOTAP_EHT_USIG_COMMON_PHY_VER = 0x00007000, IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW = 0x00038000, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW_20MHZ = 0, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW_40MHZ = 1, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW_80MHZ = 2, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW_160MHZ = 3, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW_320MHZ_1 = 4, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW_320MHZ_2 = 5, IEEE80211_RADIOTAP_EHT_USIG_COMMON_UL_DL = 0x00040000, IEEE80211_RADIOTAP_EHT_USIG_COMMON_BSS_COLOR = 0x01f80000, IEEE80211_RADIOTAP_EHT_USIG_COMMON_TXOP = 0xfe000000, diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h index 063313df447d..4de858f9929e 100644 --- a/include/net/ieee802154_netdev.h +++ b/include/net/ieee802154_netdev.h @@ -125,6 +125,35 @@ struct ieee802154_hdr_fc { #endif }; +struct ieee802154_assoc_req_pl { +#if defined(__LITTLE_ENDIAN_BITFIELD) + u8 reserved1:1, + device_type:1, + power_source:1, + rx_on_when_idle:1, + assoc_type:1, + reserved2:1, + security_cap:1, + alloc_addr:1; +#elif defined(__BIG_ENDIAN_BITFIELD) + u8 alloc_addr:1, + security_cap:1, + reserved2:1, + assoc_type:1, + rx_on_when_idle:1, + power_source:1, + device_type:1, + reserved1:1; +#else +#error "Please fix <asm/byteorder.h>" +#endif +} __packed; + +struct ieee802154_assoc_resp_pl { + __le16 short_addr; + u8 status; +} __packed; + enum ieee802154_frame_version { IEEE802154_2003_STD, IEEE802154_2006_STD, @@ -140,6 +169,19 @@ enum ieee802154_addressing_mode { IEEE802154_EXTENDED_ADDRESSING, }; +enum ieee802154_association_status { + IEEE802154_ASSOCIATION_SUCCESSFUL = 0x00, + IEEE802154_PAN_AT_CAPACITY = 0x01, + IEEE802154_PAN_ACCESS_DENIED = 0x02, + IEEE802154_HOPPING_SEQUENCE_OFFSET_DUP = 0x03, + IEEE802154_FAST_ASSOCIATION_SUCCESSFUL = 0x80, +}; + +enum ieee802154_disassociation_reason { + IEEE802154_COORD_WISHES_DEVICE_TO_LEAVE = 0x1, + IEEE802154_DEVICE_WISHES_TO_LEAVE = 0x2, +}; + struct ieee802154_hdr { struct ieee802154_hdr_fc fc; u8 seq; @@ -163,6 +205,24 @@ struct ieee802154_beacon_req_frame { struct ieee802154_mac_cmd_pl mac_pl; }; +struct ieee802154_association_req_frame { + struct ieee802154_hdr mhr; + struct ieee802154_mac_cmd_pl mac_pl; + struct ieee802154_assoc_req_pl assoc_req_pl; +}; + +struct ieee802154_association_resp_frame { + struct ieee802154_hdr mhr; + struct ieee802154_mac_cmd_pl mac_pl; + struct ieee802154_assoc_resp_pl assoc_resp_pl; +}; + +struct ieee802154_disassociation_notif_frame { + struct ieee802154_hdr mhr; + struct ieee802154_mac_cmd_pl mac_pl; + u8 disassoc_pl; +}; + /* pushes hdr onto the skb. fields of hdr->fc that can be calculated from * the contents of hdr will be, and the actual value of those bits in * hdr->fc will be ignored. this includes the INTRA_PAN bit and the frame diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index c8490729b4ae..f07642264c1e 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -22,10 +22,6 @@ #define IF_RS_SENT 0x10 #define IF_READY 0x80000000 -/* prefix flags */ -#define IF_PREFIX_ONLINK 0x01 -#define IF_PREFIX_AUTOCONF 0x02 - enum { INET6_IFADDR_STATE_PREDAD, INET6_IFADDR_STATE_DAD, @@ -89,7 +85,7 @@ struct ip6_sf_socklist { unsigned int sl_max; unsigned int sl_count; struct rcu_head rcu; - struct in6_addr sl_addr[]; + struct in6_addr sl_addr[] __counted_by(sl_max); }; #define IP6_SFBLOCK 10 /* allocate this many at once */ diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 5d2fcc137b88..9ab4bf704e86 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -44,7 +44,6 @@ struct inet_connection_sock_af_ops { struct request_sock *req_unhash, bool *own_req); u16 net_header_len; - u16 net_frag_header_len; u16 sockaddr_len; int (*setsockopt)(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); @@ -114,7 +113,10 @@ struct inet_connection_sock { __u8 quick; /* Scheduled number of quick acks */ __u8 pingpong; /* The session is interactive */ __u8 retry; /* Number of attempts */ - __u32 ato; /* Predicted tick of soft clock */ + #define ATO_BITS 8 + __u32 ato:ATO_BITS, /* Predicted tick of soft clock */ + lrcv_flowlabel:20, /* last received ipv6 flowlabel */ + unused:4; unsigned long timeout; /* Currently scheduled timeout */ __u32 lrcvtime; /* timestamp of last received data packet */ __u16 last_seg_size; /* Size of last incoming segment */ @@ -325,11 +327,10 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu); -#define TCP_PINGPONG_THRESH 1 - static inline void inet_csk_enter_pingpong_mode(struct sock *sk) { - inet_csk(sk)->icsk_ack.pingpong = TCP_PINGPONG_THRESH; + inet_csk(sk)->icsk_ack.pingpong = + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pingpong_thresh); } static inline void inet_csk_exit_pingpong_mode(struct sock *sk) @@ -339,7 +340,16 @@ static inline void inet_csk_exit_pingpong_mode(struct sock *sk) static inline bool inet_csk_in_pingpong_mode(struct sock *sk) { - return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH; + return inet_csk(sk)->icsk_ack.pingpong >= + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pingpong_thresh); +} + +static inline void inet_csk_inc_pingpong_cnt(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ack.pingpong < U8_MAX) + icsk->icsk_ack.pingpong++; } static inline bool inet_csk_has_ulp(const struct sock *sk) @@ -347,4 +357,12 @@ static inline bool inet_csk_has_ulp(const struct sock *sk) return inet_test_bit(IS_ICSK, sk) && !!inet_csk(sk)->icsk_ulp_ops; } +static inline void inet_init_csk_locks(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + spin_lock_init(&icsk->icsk_accept_queue.rskq_lock); + spin_lock_init(&icsk->icsk_accept_queue.fastopenq.lock); +} + #endif /* _INET_CONNECTION_SOCK_H */ diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 3ecfeadbfa06..7f1b38458743 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -88,7 +88,7 @@ struct inet_bind_bucket { unsigned short fast_sk_family; bool fast_ipv6_only; struct hlist_node node; - struct hlist_head owners; + struct hlist_head bhash2; }; struct inet_bind2_bucket { @@ -96,22 +96,17 @@ struct inet_bind2_bucket { int l3mdev; unsigned short port; #if IS_ENABLED(CONFIG_IPV6) - unsigned short family; -#endif - union { -#if IS_ENABLED(CONFIG_IPV6) - struct in6_addr v6_rcv_saddr; + unsigned short addr_type; + struct in6_addr v6_rcv_saddr; +#define rcv_saddr v6_rcv_saddr.s6_addr32[3] +#else + __be32 rcv_saddr; #endif - __be32 rcv_saddr; - }; /* Node in the bhash2 inet_bind_hashbucket chain */ struct hlist_node node; + struct hlist_node bhash_node; /* List of sockets hashed to this bucket */ struct hlist_head owners; - /* bhash has twsk in owners, but bhash2 has twsk in - * deathrow not to add a member in struct sock_common. - */ - struct hlist_head deathrow; }; static inline struct net *ib_net(const struct inet_bind_bucket *ib) @@ -241,7 +236,7 @@ bool inet_bind_bucket_match(const struct inet_bind_bucket *tb, struct inet_bind2_bucket * inet_bind2_bucket_create(struct kmem_cache *cachep, struct net *net, struct inet_bind_hashbucket *head, - unsigned short port, int l3mdev, + struct inet_bind_bucket *tb, const struct sock *sk); void inet_bind2_bucket_destroy(struct kmem_cache *cachep, diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 2de0e4d4a027..d94c242eb3ed 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -234,17 +234,13 @@ struct inet_sock { int uc_index; int mc_index; __be32 mc_addr; - struct { - __u16 lo; - __u16 hi; - } local_port_range; + u32 local_port_range; /* high << 16 | low */ struct ip_mc_socklist __rcu *mc_list; struct inet_cork_full cork; }; #define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ -#define IPCORK_ALLFRAG 2 /* always fragment (for ipv6 for now) */ enum { INET_FLAGS_PKTINFO = 0, @@ -268,6 +264,16 @@ enum { INET_FLAGS_NODEFRAG = 17, INET_FLAGS_BIND_ADDRESS_NO_PORT = 18, INET_FLAGS_DEFER_CONNECT = 19, + INET_FLAGS_MC6_LOOP = 20, + INET_FLAGS_RECVERR6_RFC4884 = 21, + INET_FLAGS_MC6_ALL = 22, + INET_FLAGS_AUTOFLOWLABEL_SET = 23, + INET_FLAGS_AUTOFLOWLABEL = 24, + INET_FLAGS_DONTFRAG = 25, + INET_FLAGS_RECVERR6 = 26, + INET_FLAGS_REPFLOW = 27, + INET_FLAGS_RTALERT_ISOLATE = 28, + INET_FLAGS_SNDFLOW = 29, }; /* cmsg flags for inet */ @@ -301,11 +307,6 @@ static inline unsigned long inet_cmsg_flags(const struct inet_sock *inet) #define inet_assign_bit(nr, sk, val) \ assign_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags, val) -static inline bool sk_is_inet(struct sock *sk) -{ - return sk->sk_family == AF_INET || sk->sk_family == AF_INET6; -} - /** * sk_to_full_sk - Access to a full socket * @sk: pointer to a socket diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 4a8e578405cb..f28da08a37b4 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -67,20 +67,17 @@ struct inet_timewait_sock { /* And these are ours. */ unsigned int tw_transparent : 1, tw_flowlabel : 20, - tw_pad : 3, /* 3 bits hole */ + tw_usec_ts : 1, + tw_pad : 2, /* 2 bits hole */ tw_tos : 8; u32 tw_txhash; u32 tw_priority; struct timer_list tw_timer; struct inet_bind_bucket *tw_tb; struct inet_bind2_bucket *tw_tb2; - struct hlist_node tw_bind2_node; }; #define tw_tclass tw_tos -#define twsk_for_each_bound_bhash2(__tw, list) \ - hlist_for_each_entry(__tw, list, tw_bind2_node) - static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk) { return (struct inet_timewait_sock *)sk; diff --git a/include/net/ip.h b/include/net/ip.h index 19adacd5ece0..25cb688bdc62 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -57,6 +57,7 @@ struct inet_skb_parm { #define IPSKB_FRAG_PMTU BIT(6) #define IPSKB_L3SLAVE BIT(7) #define IPSKB_NOPOLICY BIT(8) +#define IPSKB_MULTIPATH BIT(9) u16 frag_max_size; }; @@ -94,7 +95,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm, ipcm_init(ipcm); ipcm->sockc.mark = READ_ONCE(inet->sk.sk_mark); - ipcm->sockc.tsflags = inet->sk.sk_tsflags; + ipcm->sockc.tsflags = READ_ONCE(inet->sk.sk_tsflags); ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if); ipcm->addr = inet->inet_saddr; ipcm->protocol = inet->inet_num; @@ -257,7 +258,7 @@ static inline u8 ip_sendmsg_scope(const struct inet_sock *inet, static inline __u8 get_rttos(struct ipcm_cookie* ipc, struct inet_sock *inet) { - return (ipc->tos != -1) ? RT_TOS(ipc->tos) : RT_TOS(inet->tos); + return (ipc->tos != -1) ? RT_TOS(ipc->tos) : RT_TOS(READ_ONCE(inet->tos)); } /* datagram.c */ @@ -348,8 +349,14 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o } \ } -void inet_get_local_port_range(const struct net *net, int *low, int *high); -void inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high); +static inline void inet_get_local_port_range(const struct net *net, int *low, int *high) +{ + u32 range = READ_ONCE(net->ipv4.ip_local_ports.range); + + *low = range & 0xffff; + *high = range >> 16; +} +bool inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high); #ifdef CONFIG_SYSCTL static inline bool inet_is_local_reserved_port(struct net *net, unsigned short port) @@ -433,19 +440,22 @@ int ip_dont_fragment(const struct sock *sk, const struct dst_entry *dst) static inline bool ip_sk_accept_pmtu(const struct sock *sk) { - return inet_sk(sk)->pmtudisc != IP_PMTUDISC_INTERFACE && - inet_sk(sk)->pmtudisc != IP_PMTUDISC_OMIT; + u8 pmtudisc = READ_ONCE(inet_sk(sk)->pmtudisc); + + return pmtudisc != IP_PMTUDISC_INTERFACE && + pmtudisc != IP_PMTUDISC_OMIT; } static inline bool ip_sk_use_pmtu(const struct sock *sk) { - return inet_sk(sk)->pmtudisc < IP_PMTUDISC_PROBE; + return READ_ONCE(inet_sk(sk)->pmtudisc) < IP_PMTUDISC_PROBE; } static inline bool ip_sk_ignore_df(const struct sock *sk) { - return inet_sk(sk)->pmtudisc < IP_PMTUDISC_DO || - inet_sk(sk)->pmtudisc == IP_PMTUDISC_OMIT; + u8 pmtudisc = READ_ONCE(inet_sk(sk)->pmtudisc); + + return pmtudisc < IP_PMTUDISC_DO || pmtudisc == IP_PMTUDISC_OMIT; } static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, @@ -757,7 +767,7 @@ int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev); * Functions provided by ip_sockglue.c */ -void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb); +void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb, bool drop_dst); void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, struct sk_buff *skb, int tlen, int offset); int ip_cmsg_send(struct sock *sk, struct msghdr *msg, diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index c9ff23cf313e..9ba6413fd2e3 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -179,9 +179,6 @@ struct fib6_info { refcount_t fib6_ref; unsigned long expires; - - struct hlist_node gc_link; - struct dst_metrics *fib6_metrics; #define fib6_pmtu fib6_metrics->metrics[RTAX_MTU-1] @@ -250,6 +247,19 @@ static inline bool fib6_requires_src(const struct fib6_info *rt) return rt->fib6_src.plen > 0; } +static inline void fib6_clean_expires(struct fib6_info *f6i) +{ + f6i->fib6_flags &= ~RTF_EXPIRES; + f6i->expires = 0; +} + +static inline void fib6_set_expires(struct fib6_info *f6i, + unsigned long expires) +{ + f6i->expires = expires; + f6i->fib6_flags |= RTF_EXPIRES; +} + static inline bool fib6_check_expired(const struct fib6_info *f6i) { if (f6i->fib6_flags & RTF_EXPIRES) @@ -257,11 +267,6 @@ static inline bool fib6_check_expired(const struct fib6_info *f6i) return false; } -static inline bool fib6_has_expires(const struct fib6_info *f6i) -{ - return f6i->fib6_flags & RTF_EXPIRES; -} - /* Function to safely get fn->fn_sernum for passed in rt * and store result in passed in cookie. * Return true if we can get cookie safely @@ -383,7 +388,6 @@ struct fib6_table { struct inet_peer_base tb6_peers; unsigned int flags; unsigned int fib_seq; - struct hlist_head tb6_gc_hlist; /* GC candidates */ #define RT6_TABLE_HAS_DFLT_ROUTER BIT(0) }; @@ -500,48 +504,6 @@ void fib6_gc_cleanup(void); int fib6_init(void); -/* fib6_info must be locked by the caller, and fib6_info->fib6_table can be - * NULL. - */ -static inline void fib6_set_expires_locked(struct fib6_info *f6i, - unsigned long expires) -{ - struct fib6_table *tb6; - - tb6 = f6i->fib6_table; - f6i->expires = expires; - if (tb6 && !fib6_has_expires(f6i)) - hlist_add_head(&f6i->gc_link, &tb6->tb6_gc_hlist); - f6i->fib6_flags |= RTF_EXPIRES; -} - -/* fib6_info must be locked by the caller, and fib6_info->fib6_table can be - * NULL. If fib6_table is NULL, the fib6_info will no be inserted into the - * list of GC candidates until it is inserted into a table. - */ -static inline void fib6_set_expires(struct fib6_info *f6i, - unsigned long expires) -{ - spin_lock_bh(&f6i->fib6_table->tb6_lock); - fib6_set_expires_locked(f6i, expires); - spin_unlock_bh(&f6i->fib6_table->tb6_lock); -} - -static inline void fib6_clean_expires_locked(struct fib6_info *f6i) -{ - if (fib6_has_expires(f6i)) - hlist_del_init(&f6i->gc_link); - f6i->fib6_flags &= ~RTF_EXPIRES; - f6i->expires = 0; -} - -static inline void fib6_clean_expires(struct fib6_info *f6i) -{ - spin_lock_bh(&f6i->fib6_table->tb6_lock); - fib6_clean_expires_locked(f6i); - spin_unlock_bh(&f6i->fib6_table->tb6_lock); -} - struct ipv6_route_iter { struct seq_net_private p; struct fib6_walker w; @@ -642,7 +604,10 @@ static inline bool fib6_rules_early_flow_dissect(struct net *net, if (!net->ipv6.fib6_rules_require_fldissect) return false; - skb_flow_dissect_flow_keys(skb, flkeys, flag); + memset(flkeys, 0, sizeof(*flkeys)); + __skb_flow_dissect(net, skb, &flow_keys_dissector, + flkeys, NULL, 0, 0, 0, flag); + fl6->fl6_sport = flkeys->ports.src; fl6->fl6_dport = flkeys->ports.dst; fl6->flowi6_proto = flkeys->basic.ip_proto; diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index b32539bb0fb0..28b065790261 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -53,13 +53,12 @@ struct route_info { */ static inline int rt6_srcprefs2flags(unsigned int srcprefs) { - /* No need to bitmask because srcprefs have only 3 bits. */ - return srcprefs << 3; + return (srcprefs & IPV6_PREFER_SRC_MASK) << 3; } static inline unsigned int rt6_flags2srcprefs(int flags) { - return (flags >> 3) & 7; + return (flags >> 3) & IPV6_PREFER_SRC_MASK; } static inline bool rt6_need_strict(const struct in6_addr *daddr) @@ -266,7 +265,7 @@ static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb) const struct dst_entry *dst = skb_dst(skb); unsigned int mtu; - if (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) { + if (np && READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE) { mtu = READ_ONCE(dst->dev->mtu); mtu -= lwtunnel_headroom(dst->lwtstate, mtu); } else { @@ -277,14 +276,18 @@ static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb) static inline bool ip6_sk_accept_pmtu(const struct sock *sk) { - return inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_INTERFACE && - inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_OMIT; + u8 pmtudisc = READ_ONCE(inet6_sk(sk)->pmtudisc); + + return pmtudisc != IPV6_PMTUDISC_INTERFACE && + pmtudisc != IPV6_PMTUDISC_OMIT; } static inline bool ip6_sk_ignore_df(const struct sock *sk) { - return inet6_sk(sk)->pmtudisc < IPV6_PMTUDISC_DO || - inet6_sk(sk)->pmtudisc == IPV6_PMTUDISC_OMIT; + u8 pmtudisc = READ_ONCE(inet6_sk(sk)->pmtudisc); + + return pmtudisc < IPV6_PMTUDISC_DO || + pmtudisc == IPV6_PMTUDISC_OMIT; } static inline const struct in6_addr *rt6_nexthop(const struct rt6_info *rt, diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index a378eff827c7..d4667b7797e3 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -154,9 +154,10 @@ struct fib_info { int fib_nhs; bool fib_nh_is_v6; bool nh_updated; + bool pfsrc_removed; struct nexthop *nh; struct rcu_head rcu; - struct fib_nh fib_nh[]; + struct fib_nh fib_nh[] __counted_by(fib_nhs); }; @@ -418,7 +419,10 @@ static inline bool fib4_rules_early_flow_dissect(struct net *net, if (!net->ipv4.fib_rules_require_fldissect) return false; - skb_flow_dissect_flow_keys(skb, flkeys, flag); + memset(flkeys, 0, sizeof(*flkeys)); + __skb_flow_dissect(net, skb, &flow_keys_dissector, + flkeys, NULL, 0, 0, 0, flag); + fl4->fl4_sport = flkeys->ports.src; fl4->fl4_dport = flkeys->ports.dst; fl4->flowi4_proto = flkeys->basic.ip_proto; diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index e8750b4ef7e1..2d746f4c9a0a 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -416,6 +416,17 @@ static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph, return 0; } +static inline __be32 ip_tunnel_get_flowlabel(const struct iphdr *iph, + const struct sk_buff *skb) +{ + __be16 payload_protocol = skb_protocol(skb, true); + + if (payload_protocol == htons(ETH_P_IPV6)) + return ip6_flowlabel((const struct ipv6hdr *)iph); + else + return 0; +} + static inline u8 ip_tunnel_get_ttl(const struct iphdr *iph, const struct sk_buff *skb) { @@ -483,15 +494,14 @@ static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len) u64_stats_inc(&tstats->tx_packets); u64_stats_update_end(&tstats->syncp); put_cpu_ptr(tstats); + return; + } + + if (pkt_len < 0) { + DEV_STATS_INC(dev, tx_errors); + DEV_STATS_INC(dev, tx_aborted_errors); } else { - struct net_device_stats *err_stats = &dev->stats; - - if (pkt_len < 0) { - err_stats->tx_errors++; - err_stats->tx_aborted_errors++; - } else { - err_stats->tx_dropped++; - } + DEV_STATS_INC(dev, tx_dropped); } } diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 0675be0f3fa0..cf25ea21d770 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -373,12 +373,12 @@ static inline void ipcm6_init(struct ipcm6_cookie *ipc6) } static inline void ipcm6_init_sk(struct ipcm6_cookie *ipc6, - const struct ipv6_pinfo *np) + const struct sock *sk) { *ipc6 = (struct ipcm6_cookie) { .hlimit = -1, - .tclass = np->tclass, - .dontfrag = np->dontfrag, + .tclass = inet6_sk(sk)->tclass, + .dontfrag = inet6_test_bit(DONTFRAG, sk), }; } @@ -428,7 +428,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, int flags); int ip6_flowlabel_init(void); void ip6_flowlabel_cleanup(void); -bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np); +bool ip6_autoflowlabel(struct net *net, const struct sock *sk); static inline void fl6_sock_release(struct ip6_flowlabel *fl) { @@ -909,9 +909,9 @@ static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6, int hlimit; if (ipv6_addr_is_multicast(&fl6->daddr)) - hlimit = np->mcast_hops; + hlimit = READ_ONCE(np->mcast_hops); else - hlimit = np->hop_limit; + hlimit = READ_ONCE(np->hop_limit); if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); return hlimit; @@ -1128,12 +1128,6 @@ struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, st struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst, bool connected); -struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb, - struct net_device *dev, - struct net *net, struct socket *sock, - struct in6_addr *saddr, - const struct ip_tunnel_info *info, - u8 protocol, bool use_cache); struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *orig_dst); @@ -1298,15 +1292,16 @@ static inline int ip6_sock_set_v6only(struct sock *sk) static inline void ip6_sock_set_recverr(struct sock *sk) { - lock_sock(sk); - inet6_sk(sk)->recverr = true; - release_sock(sk); + inet6_set_bit(RECVERR6, sk); } -static inline int __ip6_sock_set_addr_preferences(struct sock *sk, int val) +#define IPV6_PREFER_SRC_MASK (IPV6_PREFER_SRC_TMP | IPV6_PREFER_SRC_PUBLIC | \ + IPV6_PREFER_SRC_COA) + +static inline int ip6_sock_set_addr_preferences(struct sock *sk, int val) { + unsigned int prefmask = ~IPV6_PREFER_SRC_MASK; unsigned int pref = 0; - unsigned int prefmask = ~0; /* check PUBLIC/TMP/PUBTMP_DEFAULT conflicts */ switch (val & (IPV6_PREFER_SRC_PUBLIC | @@ -1356,20 +1351,11 @@ static inline int __ip6_sock_set_addr_preferences(struct sock *sk, int val) return -EINVAL; } - inet6_sk(sk)->srcprefs = (inet6_sk(sk)->srcprefs & prefmask) | pref; + WRITE_ONCE(inet6_sk(sk)->srcprefs, + (READ_ONCE(inet6_sk(sk)->srcprefs) & prefmask) | pref); return 0; } -static inline int ip6_sock_set_addr_preferences(struct sock *sk, bool val) -{ - int ret; - - lock_sock(sk); - ret = __ip6_sock_set_addr_preferences(sk, val); - release_sock(sk); - return ret; -} - static inline void ip6_sock_set_recvpktinfo(struct sock *sk) { lock_sock(sk); diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index c48186bf4737..485c39a89866 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -60,6 +60,9 @@ struct ipv6_stub { #if IS_ENABLED(CONFIG_XFRM) void (*xfrm6_local_rxpmtu)(struct sk_buff *skb, u32 mtu); int (*xfrm6_udp_encap_rcv)(struct sock *sk, struct sk_buff *skb); + struct sk_buff *(*xfrm6_gro_udp_encap_rcv)(struct sock *sk, + struct list_head *head, + struct sk_buff *skb); int (*xfrm6_rcv_encap)(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); #endif @@ -85,6 +88,11 @@ struct ipv6_bpf_stub { sockptr_t optval, unsigned int optlen); int (*ipv6_getsockopt)(struct sock *sk, int level, int optname, sockptr_t optval, sockptr_t optlen); + int (*ipv6_dev_get_saddr)(struct net *net, + const struct net_device *dst_dev, + const struct in6_addr *daddr, + unsigned int prefs, + struct in6_addr *saddr); }; extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; diff --git a/include/net/iucv/iucv.h b/include/net/iucv/iucv.h index f9e88401d7da..8b2055d64a6b 100644 --- a/include/net/iucv/iucv.h +++ b/include/net/iucv/iucv.h @@ -80,7 +80,7 @@ struct iucv_array { u32 length; } __attribute__ ((aligned (8))); -extern struct bus_type iucv_bus; +extern const struct bus_type iucv_bus; extern struct device *iucv_root; /* @@ -489,7 +489,7 @@ struct iucv_interface { int (*path_sever)(struct iucv_path *path, u8 userdata[16]); int (*iucv_register)(struct iucv_handler *handler, int smp); void (*iucv_unregister)(struct iucv_handler *handler, int smp); - struct bus_type *bus; + const struct bus_type *bus; struct device *root; }; diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h index 7e73f8e5e497..1d55ba7c45be 100644 --- a/include/net/llc_pdu.h +++ b/include/net/llc_pdu.h @@ -262,8 +262,7 @@ static inline void llc_pdu_header_init(struct sk_buff *skb, u8 type, */ static inline void llc_pdu_decode_sa(struct sk_buff *skb, u8 *sa) { - if (skb->protocol == htons(ETH_P_802_2)) - memcpy(sa, eth_hdr(skb)->h_source, ETH_ALEN); + memcpy(sa, eth_hdr(skb)->h_source, ETH_ALEN); } /** @@ -275,8 +274,7 @@ static inline void llc_pdu_decode_sa(struct sk_buff *skb, u8 *sa) */ static inline void llc_pdu_decode_da(struct sk_buff *skb, u8 *da) { - if (skb->protocol == htons(ETH_P_802_2)) - memcpy(da, eth_hdr(skb)->h_dest, ETH_ALEN); + memcpy(da, eth_hdr(skb)->h_dest, ETH_ALEN); } /** diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 7c707358d15c..d400fe2e8668 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -79,7 +79,7 @@ * helpers for sanity checking. Drivers must ensure all work added onto the * mac80211 workqueue should be cancelled on the driver stop() callback. * - * mac80211 will flushed the workqueue upon interface removal and during + * mac80211 will flush the workqueue upon interface removal and during * suspend. * * All work performed on the mac80211 workqueue must not acquire the RTNL lock. @@ -138,7 +138,7 @@ * field to the frame RX timestamp and report the ack TX timestamp in the * ieee80211_rx_status struct. * - * Similarly, To report hardware timestamps for Timing Measurement or Fine + * Similarly, to report hardware timestamps for Timing Measurement or Fine * Timing Measurement frame TX, the driver should set the SKB's hwtstamp field * to the frame TX timestamp and report the ack RX timestamp in the * ieee80211_tx_status struct. @@ -341,6 +341,7 @@ struct ieee80211_vif_chanctx_switch { * @BSS_CHANGED_UNSOL_BCAST_PROBE_RESP: Unsolicited broadcast probe response * status changed. * @BSS_CHANGED_EHT_PUNCTURING: The channel puncturing bitmap changed. + * @BSS_CHANGED_MLD_VALID_LINKS: MLD valid links status changed. */ enum ieee80211_bss_change { BSS_CHANGED_ASSOC = 1<<0, @@ -376,6 +377,7 @@ enum ieee80211_bss_change { BSS_CHANGED_FILS_DISCOVERY = 1<<30, BSS_CHANGED_UNSOL_BCAST_PROBE_RESP = 1<<31, BSS_CHANGED_EHT_PUNCTURING = BIT_ULL(32), + BSS_CHANGED_MLD_VALID_LINKS = BIT_ULL(33), /* when adding here, make sure to change ieee80211_reconfig */ }; @@ -474,9 +476,9 @@ struct ieee80211_ba_event { /** * struct ieee80211_event - event to be sent to the driver * @type: The event itself. See &enum ieee80211_event_type. - * @rssi: relevant if &type is %RSSI_EVENT - * @mlme: relevant if &type is %AUTH_EVENT - * @ba: relevant if &type is %BAR_RX_EVENT or %BA_FRAME_TIMEOUT + * @u.rssi: relevant if &type is %RSSI_EVENT + * @u.mlme: relevant if &type is %AUTH_EVENT + * @u.ba: relevant if &type is %BAR_RX_EVENT or %BA_FRAME_TIMEOUT * @u:union holding the fields above */ struct ieee80211_event { @@ -539,8 +541,6 @@ struct ieee80211_fils_discovery { * @link_id: link ID, or 0 for non-MLO * @htc_trig_based_pkt_ext: default PE in 4us units, if BSS supports HE * @uora_exists: is the UORA element advertised by AP - * @ack_enabled: indicates support to receive a multi-TID that solicits either - * ACK, BACK or both * @uora_ocw_range: UORA element's OCW Range field * @frame_time_rts_th: HE duration RTS threshold, in units of 32us * @he_support: does this BSS support HE @@ -643,9 +643,7 @@ struct ieee80211_fils_discovery { * @pwr_reduction: power constraint of BSS. * @eht_support: does this BSS support EHT * @eht_puncturing: bitmap to indicate which channels are punctured in this BSS - * @csa_active: marks whether a channel switch is going on. Internally it is - * write-protected by sdata_lock and local->mtx so holding either is fine - * for read access. + * @csa_active: marks whether a channel switch is going on. * @csa_punct_bitmap: new puncturing bitmap for channel switch * @mu_mimo_owner: indicates interface owns MU-MIMO capability * @chanctx_conf: The channel context this interface is assigned to, or %NULL @@ -653,9 +651,7 @@ struct ieee80211_fils_discovery { * path needing to access it; even though the netdev carrier will always * be off when it is %NULL there can still be races and packets could be * processed after it switches back to %NULL. - * @color_change_active: marks whether a color change is ongoing. Internally it is - * write-protected by sdata_lock and local->mtx so holding either is fine - * for read access. + * @color_change_active: marks whether a color change is ongoing. * @color_change_color: the bss color that will be used after the change. * @ht_ldpc: in AP mode, indicates interface has HT LDPC capability. * @vht_ldpc: in AP mode, indicates interface has VHT LDPC capability. @@ -1082,6 +1078,11 @@ struct ieee80211_tx_rate { #define IEEE80211_MAX_TX_RETRY 31 +static inline bool ieee80211_rate_valid(struct ieee80211_tx_rate *rate) +{ + return rate->idx >= 0 && rate->count > 0; +} + static inline void ieee80211_rate_set_vht(struct ieee80211_tx_rate *rate, u8 mcs, u8 nss) { @@ -1115,7 +1116,9 @@ ieee80211_rate_get_vht_nss(const struct ieee80211_tx_rate *rate) * not valid if the interface is an MLD since we won't know which * link the frame will be transmitted on * @hw_queue: HW queue to put the frame on, skb_get_queue_mapping() gives the AC - * @ack_frame_id: internal frame ID for TX status, used internally + * @status_data: internal data for TX status handling, assigned privately, + * see also &enum ieee80211_status_data for the internal documentation + * @status_data_idr: indicates status data is IDR allocated ID for ack frame * @tx_time_est: TX time estimate in units of 4us, used internally * @control: union part for control data * @control.rates: TX rates array to try @@ -1145,20 +1148,16 @@ ieee80211_rate_get_vht_nss(const struct ieee80211_tx_rate *rate) * @ack: union part for pure ACK data * @ack.cookie: cookie for the ACK * @driver_data: array of driver_data pointers - * @ampdu_ack_len: number of acked aggregated frames. - * relevant only if IEEE80211_TX_STAT_AMPDU was set. - * @ampdu_len: number of aggregated frames. - * relevant only if IEEE80211_TX_STAT_AMPDU was set. - * @ack_signal: signal strength of the ACK frame */ struct ieee80211_tx_info { /* common information */ u32 flags; u32 band:3, - ack_frame_id:13, + status_data_idr:1, + status_data:13, hw_queue:4, tx_time_est:10; - /* 2 free bits */ + /* 1 free bit */ union { struct { @@ -1172,7 +1171,11 @@ struct ieee80211_tx_info { u8 use_cts_prot:1; u8 short_preamble:1; u8 skip_table:1; - /* 2 bytes free */ + + /* for injection only (bitmap) */ + u8 antennas:2; + + /* 14 bits free */ }; /* only needed before rate control */ unsigned long jiffies; @@ -1352,6 +1355,9 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * the frame. * @RX_FLAG_FAILED_PLCP_CRC: Set this flag if the PCLP check failed on * the frame. + * @RX_FLAG_MACTIME: The timestamp passed in the RX status (@mactime + * field) is valid if this field is non-zero, and the position + * where the timestamp was sampled depends on the value. * @RX_FLAG_MACTIME_START: The timestamp passed in the RX status (@mactime * field) is valid and contains the time the first symbol of the MPDU * was received. This is useful in monitor mode and for proper IBSS @@ -1361,6 +1367,11 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * (including FCS) was received. * @RX_FLAG_MACTIME_PLCP_START: The timestamp passed in the RX status (@mactime * field) is valid and contains the time the SYNC preamble was received. + * @RX_FLAG_MACTIME_IS_RTAP_TS64: The timestamp passed in the RX status @mactime + * is only for use in the radiotap timestamp header, not otherwise a valid + * @mactime value. Note this is a separate flag so that we continue to see + * %RX_FLAG_MACTIME as unset. Also note that in this case the timestamp is + * reported to be 64 bits wide, not just 32. * @RX_FLAG_NO_SIGNAL_VAL: The signal strength value is not present. * Valid only for data frames (mainly A-MPDU) * @RX_FLAG_AMPDU_DETAILS: A-MPDU details are known, in particular the reference @@ -1431,12 +1442,12 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) enum mac80211_rx_flags { RX_FLAG_MMIC_ERROR = BIT(0), RX_FLAG_DECRYPTED = BIT(1), - RX_FLAG_MACTIME_PLCP_START = BIT(2), + RX_FLAG_ONLY_MONITOR = BIT(2), RX_FLAG_MMIC_STRIPPED = BIT(3), RX_FLAG_IV_STRIPPED = BIT(4), RX_FLAG_FAILED_FCS_CRC = BIT(5), RX_FLAG_FAILED_PLCP_CRC = BIT(6), - RX_FLAG_MACTIME_START = BIT(7), + RX_FLAG_MACTIME_IS_RTAP_TS64 = BIT(7), RX_FLAG_NO_SIGNAL_VAL = BIT(8), RX_FLAG_AMPDU_DETAILS = BIT(9), RX_FLAG_PN_VALIDATED = BIT(10), @@ -1445,8 +1456,10 @@ enum mac80211_rx_flags { RX_FLAG_AMPDU_IS_LAST = BIT(13), RX_FLAG_AMPDU_DELIM_CRC_ERROR = BIT(14), RX_FLAG_AMPDU_DELIM_CRC_KNOWN = BIT(15), - RX_FLAG_MACTIME_END = BIT(16), - RX_FLAG_ONLY_MONITOR = BIT(17), + RX_FLAG_MACTIME = BIT(16) | BIT(17), + RX_FLAG_MACTIME_PLCP_START = 1 << 16, + RX_FLAG_MACTIME_START = 2 << 16, + RX_FLAG_MACTIME_END = 3 << 16, RX_FLAG_SKIP_MONITOR = BIT(18), RX_FLAG_AMSDU_MORE = BIT(19), RX_FLAG_RADIOTAP_TLV_AT_END = BIT(20), @@ -1757,15 +1770,15 @@ struct ieee80211_channel_switch { * @IEEE80211_VIF_GET_NOA_UPDATE: request to handle NOA attributes * and send P2P_PS notification to the driver if NOA changed, even * this is not pure P2P vif. - * @IEEE80211_VIF_DISABLE_SMPS_OVERRIDE: disable user configuration of - * SMPS mode via debugfs. + * @IEEE80211_VIF_EML_ACTIVE: The driver indicates that EML operation is + * enabled for the interface. */ enum ieee80211_vif_flags { IEEE80211_VIF_BEACON_FILTER = BIT(0), IEEE80211_VIF_SUPPORTS_CQM_RSSI = BIT(1), IEEE80211_VIF_SUPPORTS_UAPSD = BIT(2), IEEE80211_VIF_GET_NOA_UPDATE = BIT(3), - IEEE80211_VIF_DISABLE_SMPS_OVERRIDE = BIT(4), + IEEE80211_VIF_EML_ACTIVE = BIT(4), }; @@ -1938,7 +1951,7 @@ static inline bool ieee80211_vif_is_mld(const struct ieee80211_vif *vif) for (link_id = 0; link_id < ARRAY_SIZE((vif)->link_conf); link_id++) \ if ((!(vif)->active_links || \ (vif)->active_links & BIT(link_id)) && \ - (link = rcu_dereference((vif)->link_conf[link_id]))) + (link = link_conf_dereference_check(vif, link_id))) static inline bool ieee80211_vif_is_mesh(struct ieee80211_vif *vif) { @@ -1971,22 +1984,18 @@ struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev); */ struct wireless_dev *ieee80211_vif_to_wdev(struct ieee80211_vif *vif); -/** - * lockdep_vif_mutex_held - for lockdep checks on link poiners - * @vif: the interface to check - */ -static inline bool lockdep_vif_mutex_held(struct ieee80211_vif *vif) +static inline bool lockdep_vif_wiphy_mutex_held(struct ieee80211_vif *vif) { - return lockdep_is_held(&ieee80211_vif_to_wdev(vif)->mtx); + return lockdep_is_held(&ieee80211_vif_to_wdev(vif)->wiphy->mtx); } #define link_conf_dereference_protected(vif, link_id) \ rcu_dereference_protected((vif)->link_conf[link_id], \ - lockdep_vif_mutex_held(vif)) + lockdep_vif_wiphy_mutex_held(vif)) #define link_conf_dereference_check(vif, link_id) \ rcu_dereference_check((vif)->link_conf[link_id], \ - lockdep_vif_mutex_held(vif)) + lockdep_vif_wiphy_mutex_held(vif)) /** * enum ieee80211_key_flags - key flags @@ -2393,7 +2402,7 @@ static inline bool lockdep_sta_mutex_held(struct ieee80211_sta *pubsta) for (link_id = 0; link_id < ARRAY_SIZE((sta)->link); link_id++) \ if ((!(vif)->active_links || \ (vif)->active_links & BIT(link_id)) && \ - ((link_sta) = link_sta_dereference_protected(sta, link_id))) + ((link_sta) = link_sta_dereference_check(sta, link_id))) /** * enum sta_notify_cmd - sta notify command @@ -2680,6 +2689,9 @@ struct ieee80211_txq { * @IEEE80211_HW_MLO_MCAST_MULTI_LINK_TX: Hardware/driver handles transmitting * multicast frames on all links, mac80211 should not do that. * + * @IEEE80211_HW_DISALLOW_PUNCTURING: HW requires disabling puncturing in EHT + * and connecting with a lower bandwidth instead + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2737,6 +2749,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_SUPPORTS_CONC_MON_RX_DECAP, IEEE80211_HW_DETECTS_COLOR_COLLISION, IEEE80211_HW_MLO_MCAST_MULTI_LINK_TX, + IEEE80211_HW_DISALLOW_PUNCTURING, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS @@ -2825,8 +2838,6 @@ enum ieee80211_hw_flags { * the default is _GI | _BANDWIDTH. * Use the %IEEE80211_RADIOTAP_VHT_KNOWN_\* values. * - * @radiotap_he: HE radiotap validity flags - * * @radiotap_timestamp: Information for the radiotap timestamp field; if the * @units_pos member is set to a non-negative value then the timestamp * field will be added and populated from the &struct ieee80211_rx_status @@ -3056,7 +3067,7 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * The set_key() call for the %SET_KEY command should return 0 if * the key is now in use, -%EOPNOTSUPP or -%ENOSPC if it couldn't be * added; if you return 0 then hw_key_idx must be assigned to the - * hardware key index, you are free to use the full u8 range. + * hardware key index. You are free to use the full u8 range. * * Note that in the case that the @IEEE80211_HW_SW_CRYPTO_CONTROL flag is * set, mac80211 will not automatically fall back to software crypto if @@ -3066,7 +3077,7 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * When the cmd is %DISABLE_KEY then it must succeed. * * Note that it is permissible to not decrypt a frame even if a key - * for it has been uploaded to hardware, the stack will not make any + * for it has been uploaded to hardware. The stack will not make any * decision based on whether a key has been uploaded or not but rather * based on the receive flags. * @@ -3081,7 +3092,7 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * The update_tkip_key() call updates the driver with the new phase 1 key. * This happens every time the iv16 wraps around (every 65536 packets). The * set_key() call will happen only once for each key (unless the AP did - * rekeying), it will not include a valid phase 1 key. The valid phase 1 key is + * rekeying); it will not include a valid phase 1 key. The valid phase 1 key is * provided by update_tkip_key only. The trigger that makes mac80211 call this * handler is software decryption with wrap around of iv16. * @@ -3108,7 +3119,7 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * * mac80211 has support for various powersave implementations. * - * First, it can support hardware that handles all powersaving by itself, + * First, it can support hardware that handles all powersaving by itself; * such hardware should simply set the %IEEE80211_HW_SUPPORTS_PS hardware * flag. In that case, it will be told about the desired powersave mode * with the %IEEE80211_CONF_PS flag depending on the association status. @@ -3133,12 +3144,12 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * %IEEE80211_HW_PS_NULLFUNC_STACK flags. The hardware is of course still * required to pass up beacons. The hardware is still required to handle * waking up for multicast traffic; if it cannot the driver must handle that - * as best as it can, mac80211 is too slow to do that. + * as best as it can; mac80211 is too slow to do that. * * Dynamic powersave is an extension to normal powersave in which the * hardware stays awake for a user-specified period of time after sending a * frame so that reply frames need not be buffered and therefore delayed to - * the next wakeup. It's compromise of getting good enough latency when + * the next wakeup. It's a compromise of getting good enough latency when * there's data traffic and still saving significantly power in idle * periods. * @@ -3203,7 +3214,7 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * Note that change, for the sake of simplification, also includes information * elements appearing or disappearing from the beacon. * - * Some hardware supports an "ignore list" instead, just make sure nothing + * Some hardware supports an "ignore list" instead. Just make sure nothing * that was requested is on the ignore list, and include commonly changing * information element IDs in the ignore list, for example 11 (BSS load) and * the various vendor-assigned IEs with unknown contents (128, 129, 133-136, @@ -3214,7 +3225,7 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * In addition to these capabilities, hardware should support notifying the * host of changes in the beacon RSSI. This is relevant to implement roaming * when no traffic is flowing (when traffic is flowing we see the RSSI of - * the received data packets). This can consist in notifying the host when + * the received data packets). This can consist of notifying the host when * the RSSI changes significantly or when it drops below or rises above * configurable thresholds. In the future these thresholds will also be * configured by mac80211 (which gets them from userspace) to implement @@ -3361,8 +3372,8 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * period starts for any reason, @release_buffered_frames is called * with the number of frames to be released and which TIDs they are * to come from. In this case, the driver is responsible for setting - * the EOSP (for uAPSD) and MORE_DATA bits in the released frames, - * to help the @more_data parameter is passed to tell the driver if + * the EOSP (for uAPSD) and MORE_DATA bits in the released frames. + * To help the @more_data parameter is passed to tell the driver if * there is more data on other TIDs -- the TIDs to release frames * from are ignored since mac80211 doesn't know how many frames the * buffers for those TIDs contain. @@ -3411,7 +3422,7 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * Additionally, the driver has to then use these HW queue IDs for the queue * management functions (ieee80211_stop_queue() et al.) * - * The driver is free to set up the queue mappings as needed, multiple virtual + * The driver is free to set up the queue mappings as needed; multiple virtual * interfaces may map to the same hardware queues if needed. The setup has to * happen during add_interface or change_interface callbacks. For example, a * driver supporting station+station and station+AP modes might decide to have @@ -3635,11 +3646,14 @@ enum ieee80211_reconfig_type { * @success: whether the frame exchange was successful, only * used with the mgd_complete_tx() method, and then only * valid for auth and (re)assoc. + * @link_id: the link id on which the frame will be TX'ed. + * Only used with the mgd_prepare_tx() method. */ struct ieee80211_prep_tx_info { u16 duration; u16 subtype; u8 success:1; + int link_id; }; /** @@ -3863,6 +3877,10 @@ struct ieee80211_prep_tx_info { * the station. See @sta_pre_rcu_remove if needed. * This callback can sleep. * + * @vif_add_debugfs: Drivers can use this callback to add a debugfs vif + * directory with its files. This callback should be within a + * CONFIG_MAC80211_DEBUGFS conditional. This callback can sleep. + * * @link_add_debugfs: Drivers can use this callback to add debugfs files * when a link is added to a mac80211 vif. This callback should be within * a CONFIG_MAC80211_DEBUGFS conditional. This callback can sleep. @@ -4067,11 +4085,15 @@ struct ieee80211_prep_tx_info { * This callback must be atomic. * * @get_et_sset_count: Ethtool API to get string-set count. + * Note that the wiphy mutex is not held for this callback since it's + * expected to return a static value. * * @get_et_stats: Ethtool API to get a set of u64 stats. * * @get_et_strings: Ethtool API to get a set of strings to describe stats * and perhaps other supported types of ethtool data-sets. + * Note that the wiphy mutex is not held for this callback since it's + * expected to return a static value. * * @mgd_prepare_tx: Prepare for transmitting a management frame for association * before associated. In multi-channel scenarios, a virtual interface is @@ -4250,6 +4272,8 @@ struct ieee80211_prep_tx_info { * disable background CAC/radar detection. * @net_fill_forward_path: Called from .ndo_fill_forward_path in order to * resolve a path for hardware flow offloading + * @can_activate_links: Checks if a specific active_links bitmap is + * supported by the driver. * @change_vif_links: Change the valid links on an interface, note that while * removing the old link information is still valid (link_conf pointer), * but may immediately disappear after the function returns. The old or @@ -4358,6 +4382,8 @@ struct ieee80211_ops { int (*sta_remove)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta); #ifdef CONFIG_MAC80211_DEBUGFS + void (*vif_add_debugfs)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif); void (*link_add_debugfs)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_bss_conf *link_conf, @@ -4506,7 +4532,8 @@ struct ieee80211_ops { struct ieee80211_prep_tx_info *info); void (*mgd_protect_tdls_discover)(struct ieee80211_hw *hw, - struct ieee80211_vif *vif); + struct ieee80211_vif *vif, + unsigned int link_id); int (*add_chanctx)(struct ieee80211_hw *hw, struct ieee80211_chanctx_conf *ctx); @@ -4544,7 +4571,8 @@ struct ieee80211_ops { struct ieee80211_channel_switch *ch_switch); int (*post_channel_switch)(struct ieee80211_hw *hw, - struct ieee80211_vif *vif); + struct ieee80211_vif *vif, + struct ieee80211_bss_conf *link_conf); void (*abort_channel_switch)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); void (*channel_switch_rx_beacon)(struct ieee80211_hw *hw, @@ -4626,6 +4654,9 @@ struct ieee80211_ops { struct ieee80211_sta *sta, struct net_device_path_ctx *ctx, struct net_device_path *path); + bool (*can_activate_links)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + u16 active_links); int (*change_vif_links)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u16 old_links, u16 new_links, @@ -4890,7 +4921,7 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw); * for a single hardware must be synchronized against each other. Calls to * this function, ieee80211_rx_ni() and ieee80211_rx_irqsafe() may not be * mixed for a single hardware. Must not run concurrently with - * ieee80211_tx_status() or ieee80211_tx_status_ni(). + * ieee80211_tx_status_skb() or ieee80211_tx_status_ni(). * * This function must be called with BHs disabled and RCU read lock * @@ -4915,7 +4946,7 @@ void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *sta, * for a single hardware must be synchronized against each other. Calls to * this function, ieee80211_rx_ni() and ieee80211_rx_irqsafe() may not be * mixed for a single hardware. Must not run concurrently with - * ieee80211_tx_status() or ieee80211_tx_status_ni(). + * ieee80211_tx_status_skb() or ieee80211_tx_status_ni(). * * This function must be called with BHs disabled. * @@ -4940,7 +4971,7 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *sta, * for a single hardware must be synchronized against each other. Calls to * this function, ieee80211_rx_ni() and ieee80211_rx_irqsafe() may not be * mixed for a single hardware. Must not run concurrently with - * ieee80211_tx_status() or ieee80211_tx_status_ni(). + * ieee80211_tx_status_skb() or ieee80211_tx_status_ni(). * * In process context use instead ieee80211_rx_ni(). * @@ -4960,7 +4991,7 @@ static inline void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb) * * Calls to this function, ieee80211_rx() or ieee80211_rx_ni() may not * be mixed for a single hardware.Must not run concurrently with - * ieee80211_tx_status() or ieee80211_tx_status_ni(). + * ieee80211_tx_status_skb() or ieee80211_tx_status_ni(). * * @hw: the hardware this frame came in on * @skb: the buffer to receive, owned by mac80211 after this call @@ -4975,7 +5006,7 @@ void ieee80211_rx_irqsafe(struct ieee80211_hw *hw, struct sk_buff *skb); * * Calls to this function, ieee80211_rx() and ieee80211_rx_irqsafe() may * not be mixed for a single hardware. Must not run concurrently with - * ieee80211_tx_status() or ieee80211_tx_status_ni(). + * ieee80211_tx_status_skb() or ieee80211_tx_status_ni(). * * @hw: the hardware this frame came in on * @skb: the buffer to receive, owned by mac80211 after this call @@ -5151,7 +5182,7 @@ void ieee80211_tx_rate_update(struct ieee80211_hw *hw, struct ieee80211_tx_info *info); /** - * ieee80211_tx_status - transmit status callback + * ieee80211_tx_status_skb - transmit status callback * * Call this function for all transmitted frames after they have been * transmitted. It is permissible to not call this function for @@ -5166,13 +5197,13 @@ void ieee80211_tx_rate_update(struct ieee80211_hw *hw, * @hw: the hardware the frame was transmitted by * @skb: the frame that was transmitted, owned by mac80211 after this call */ -void ieee80211_tx_status(struct ieee80211_hw *hw, - struct sk_buff *skb); +void ieee80211_tx_status_skb(struct ieee80211_hw *hw, + struct sk_buff *skb); /** * ieee80211_tx_status_ext - extended transmit status callback * - * This function can be used as a replacement for ieee80211_tx_status + * This function can be used as a replacement for ieee80211_tx_status_skb() * in drivers that may want to provide extra information that does not * fit into &struct ieee80211_tx_info. * @@ -5189,7 +5220,7 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw, /** * ieee80211_tx_status_noskb - transmit status callback without skb * - * This function can be used as a replacement for ieee80211_tx_status + * This function can be used as a replacement for ieee80211_tx_status_skb() * in drivers that cannot reliably map tx status information back to * specific skbs. * @@ -5217,9 +5248,9 @@ static inline void ieee80211_tx_status_noskb(struct ieee80211_hw *hw, /** * ieee80211_tx_status_ni - transmit status callback (in process context) * - * Like ieee80211_tx_status() but can be called in process context. + * Like ieee80211_tx_status_skb() but can be called in process context. * - * Calls to this function, ieee80211_tx_status() and + * Calls to this function, ieee80211_tx_status_skb() and * ieee80211_tx_status_irqsafe() may not be mixed * for a single hardware. * @@ -5230,17 +5261,17 @@ static inline void ieee80211_tx_status_ni(struct ieee80211_hw *hw, struct sk_buff *skb) { local_bh_disable(); - ieee80211_tx_status(hw, skb); + ieee80211_tx_status_skb(hw, skb); local_bh_enable(); } /** * ieee80211_tx_status_irqsafe - IRQ-safe transmit status callback * - * Like ieee80211_tx_status() but can be called in IRQ context + * Like ieee80211_tx_status_skb() but can be called in IRQ context * (internally defers to a tasklet.) * - * Calls to this function, ieee80211_tx_status() and + * Calls to this function, ieee80211_tx_status_skb() and * ieee80211_tx_status_ni() may not be mixed for a single hardware. * * @hw: the hardware the frame was transmitted by @@ -5788,12 +5819,11 @@ void ieee80211_set_key_rx_seq(struct ieee80211_key_conf *keyconf, * ieee80211_remove_key - remove the given key * @keyconf: the parameter passed with the set key * + * Context: Must be called with the wiphy mutex held. + * * Remove the given key. If the key was uploaded to the hardware at the * time this function is called, it is not deleted in the hardware but * instead assumed to have been removed already. - * - * Note that due to locking considerations this function can (currently) - * only be called during key iteration (ieee80211_iter_keys().) */ void ieee80211_remove_key(struct ieee80211_key_conf *keyconf); @@ -6347,12 +6377,12 @@ ieee80211_txq_airtime_check(struct ieee80211_hw *hw, struct ieee80211_txq *txq); * @iter: iterator function that will be called for each key * @iter_data: custom data to pass to the iterator function * + * Context: Must be called with wiphy mutex held; can sleep. + * * This function can be used to iterate all the keys known to * mac80211, even those that weren't previously programmed into * the device. This is intended for use in WoWLAN if the device - * needs reprogramming of the keys during suspend. Note that due - * to locking reasons, it is also only safe to call this at few - * spots since it must hold the RTNL and be able to sleep. + * needs reprogramming of the keys during suspend. * * The order in which the keys are iterated matches the order * in which they were originally installed and handed to the @@ -6542,11 +6572,14 @@ void ieee80211_radar_detected(struct ieee80211_hw *hw); * ieee80211_chswitch_done - Complete channel switch process * @vif: &struct ieee80211_vif pointer from the add_interface callback. * @success: make the channel switch successful or not + * @link_id: the link_id on which the switch was done. Ignored if success is + * false. * * Complete the channel switch post-process: set the new operational channel * and wake up the suspended queues. */ -void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success); +void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success, + unsigned int link_id); /** * ieee80211_channel_switch_disconnect - disconnect due to channel switch error @@ -7242,7 +7275,7 @@ ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq, * * This function is used to check whether given txq is allowed to transmit by * the airtime scheduler, and can be used by drivers to access the airtime - * fairness accounting without going using the scheduling order enfored by + * fairness accounting without using the scheduling order enforced by * next_txq(). * * Returns %true if the airtime scheduler thinks the TXQ should be allowed to @@ -7411,6 +7444,9 @@ static inline bool ieee80211_is_tx_data(struct sk_buff *skb) * @vif: interface to set active links on * @active_links: the new active links bitmap * + * Context: Must be called with wiphy mutex held; may sleep; calls + * back into the driver. + * * This changes the active links on an interface. The interface * must be in client mode (in AP mode, all links are always active), * and @active_links must be a subset of the vif's valid_links. @@ -7418,6 +7454,7 @@ static inline bool ieee80211_is_tx_data(struct sk_buff *skb) * If a link is switched off and another is switched on at the same * time (e.g. active_links going from 0x1 to 0x10) then you will get * a sequence of calls like + * * - change_vif_links(0x11) * - unassign_vif_chanctx(link_id=0) * - change_sta_links(0x11) for each affected STA (the AP) @@ -7427,10 +7464,6 @@ static inline bool ieee80211_is_tx_data(struct sk_buff *skb) * - change_sta_links(0x10) for each affected STA (the AP) * - assign_vif_chanctx(link_id=4) * - change_vif_links(0x10) - * - * Note: This function acquires some mac80211 locks and must not - * be called with any driver locks held that could cause a - * lock dependency inversion. Best call it without locks. */ int ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links); diff --git a/include/net/macsec.h b/include/net/macsec.h index 75a6f4863c83..dbd22180cc5c 100644 --- a/include/net/macsec.h +++ b/include/net/macsec.h @@ -247,6 +247,23 @@ struct macsec_secy { /** * struct macsec_context - MACsec context for hardware offloading + * @netdev: a valid pointer to a struct net_device if @offload == + * MACSEC_OFFLOAD_MAC + * @phydev: a valid pointer to a struct phy_device if @offload == + * MACSEC_OFFLOAD_PHY + * @offload: MACsec offload status + * @secy: pointer to a MACsec SecY + * @rx_sc: pointer to a RX SC + * @update_pn: when updating the SA, update the next PN + * @assoc_num: association number of the target SA + * @key: key of the target SA + * @rx_sa: pointer to an RX SA if a RX SA is added/updated/removed + * @tx_sa: pointer to an TX SA if a TX SA is added/updated/removed + * @tx_sc_stats: pointer to TX SC stats structure + * @tx_sa_stats: pointer to TX SA stats structure + * @rx_sc_stats: pointer to RX SC stats structure + * @rx_sa_stats: pointer to RX SA stats structure + * @dev_stats: pointer to dev stats structure */ struct macsec_context { union { @@ -258,6 +275,7 @@ struct macsec_context { struct macsec_secy *secy; struct macsec_rx_sc *rx_sc; struct { + bool update_pn; unsigned char assoc_num; u8 key[MACSEC_MAX_KEY_LEN]; union { @@ -276,6 +294,33 @@ struct macsec_context { /** * struct macsec_ops - MACsec offloading operations + * @mdo_dev_open: called when the MACsec interface transitions to the up state + * @mdo_dev_stop: called when the MACsec interface transitions to the down + * state + * @mdo_add_secy: called when a new SecY is added + * @mdo_upd_secy: called when the SecY flags are changed or the MAC address of + * the MACsec interface is changed + * @mdo_del_secy: called when the hw offload is disabled or the MACsec + * interface is removed + * @mdo_add_rxsc: called when a new RX SC is added + * @mdo_upd_rxsc: called when a certain RX SC is updated + * @mdo_del_rxsc: called when a certain RX SC is removed + * @mdo_add_rxsa: called when a new RX SA is added + * @mdo_upd_rxsa: called when a certain RX SA is updated + * @mdo_del_rxsa: called when a certain RX SA is removed + * @mdo_add_txsa: called when a new TX SA is added + * @mdo_upd_txsa: called when a certain TX SA is updated + * @mdo_del_txsa: called when a certain TX SA is removed + * @mdo_get_dev_stats: called when dev stats are read + * @mdo_get_tx_sc_stats: called when TX SC stats are read + * @mdo_get_tx_sa_stats: called when TX SA stats are read + * @mdo_get_rx_sc_stats: called when RX SC stats are read + * @mdo_get_rx_sa_stats: called when RX SA stats are read + * @mdo_insert_tx_tag: called to insert the TX tag + * @needed_headroom: number of bytes reserved at the beginning of the sk_buff + * for the TX tag + * @needed_tailroom: number of bytes reserved at the end of the sk_buff for the + * TX tag */ struct macsec_ops { /* Device wide */ @@ -302,6 +347,11 @@ struct macsec_ops { int (*mdo_get_tx_sa_stats)(struct macsec_context *ctx); int (*mdo_get_rx_sc_stats)(struct macsec_context *ctx); int (*mdo_get_rx_sa_stats)(struct macsec_context *ctx); + /* Offload tag */ + int (*mdo_insert_tx_tag)(struct phy_device *phydev, + struct sk_buff *skb); + unsigned int needed_headroom; + unsigned int needed_tailroom; }; void macsec_pn_wrapped(struct macsec_secy *secy, struct macsec_tx_sa *tx_sa); @@ -324,4 +374,9 @@ static inline void *macsec_netdev_priv(const struct net_device *dev) return netdev_priv(dev); } +static inline u64 sci_to_cpu(sci_t sci) +{ + return be64_to_cpu((__force __be64)sci); +} + #endif /* _NET_MACSEC_H_ */ diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index 88b6ef7ce1a6..27684135bb4d 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -66,6 +66,7 @@ enum { GDMA_DEVICE_NONE = 0, GDMA_DEVICE_HWC = 1, GDMA_DEVICE_MANA = 2, + GDMA_DEVICE_MANA_IB = 3, }; struct gdma_resource { @@ -149,6 +150,7 @@ struct gdma_general_req { #define GDMA_MESSAGE_V1 1 #define GDMA_MESSAGE_V2 2 +#define GDMA_MESSAGE_V3 3 struct gdma_general_resp { struct gdma_resp_hdr hdr; @@ -293,6 +295,7 @@ struct gdma_queue { u32 head; u32 tail; + struct list_head entry; /* Extra fields specific to EQ/CQ. */ union { @@ -328,6 +331,7 @@ struct gdma_queue_spec { void *context; unsigned long log2_throttle_limit; + unsigned int msix_index; } eq; struct { @@ -344,7 +348,9 @@ struct gdma_queue_spec { struct gdma_irq_context { void (*handler)(void *arg); - void *arg; + /* Protect the eq_list */ + spinlock_t lock; + struct list_head eq_list; char name[MANA_IRQ_NAME_SZ]; }; @@ -355,7 +361,6 @@ struct gdma_context { unsigned int max_num_queues; unsigned int max_num_msix; unsigned int num_msix_usable; - struct gdma_resource msix_resource; struct gdma_irq_context *irq_contexts; /* L2 MTU */ @@ -387,6 +392,9 @@ struct gdma_context { /* Azure network adapter */ struct gdma_dev mana; + + /* Azure RDMA adapter */ + struct gdma_dev mana_ib; }; #define MAX_NUM_GDMA_DEVICES 4 diff --git a/include/net/mana/hw_channel.h b/include/net/mana/hw_channel.h index 3d3b5c881bc1..158b125692c2 100644 --- a/include/net/mana/hw_channel.h +++ b/include/net/mana/hw_channel.h @@ -121,7 +121,7 @@ struct hwc_dma_buf { u32 gpa_mkey; u32 num_reqs; - struct hwc_work_request reqs[]; + struct hwc_work_request reqs[] __counted_by(num_reqs); }; typedef void hwc_rx_event_handler_t(void *ctx, u32 gdma_rxq_id, diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 9f70b4332238..76147feb0d10 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -103,9 +103,10 @@ struct mana_txq { /* skb data and frags dma mappings */ struct mana_skb_head { - dma_addr_t dma_handle[MAX_SKB_FRAGS + 1]; + /* GSO pkts may have 2 SGEs for the linear part*/ + dma_addr_t dma_handle[MAX_SKB_FRAGS + 2]; - u32 size[MAX_SKB_FRAGS + 1]; + u32 size[MAX_SKB_FRAGS + 2]; }; #define MANA_HEADROOM sizeof(struct mana_skb_head) @@ -338,7 +339,7 @@ struct mana_rxq { /* MUST BE THE LAST MEMBER: * Each receive buffer has an associated mana_recv_buf_oob. */ - struct mana_recv_buf_oob rx_oobs[]; + struct mana_recv_buf_oob rx_oobs[] __counted_by(num_rx_buf); }; struct mana_tx_qp { @@ -352,6 +353,25 @@ struct mana_tx_qp { struct mana_ethtool_stats { u64 stop_queue; u64 wake_queue; + u64 hc_rx_discards_no_wqe; + u64 hc_rx_err_vport_disabled; + u64 hc_rx_bytes; + u64 hc_rx_ucast_pkts; + u64 hc_rx_ucast_bytes; + u64 hc_rx_bcast_pkts; + u64 hc_rx_bcast_bytes; + u64 hc_rx_mcast_pkts; + u64 hc_rx_mcast_bytes; + u64 hc_tx_err_gf_disabled; + u64 hc_tx_err_vport_disabled; + u64 hc_tx_err_inval_vportoffset_pkt; + u64 hc_tx_err_vlan_enforcement; + u64 hc_tx_err_eth_type_enforcement; + u64 hc_tx_err_sa_enforcement; + u64 hc_tx_err_sqpdid_enforcement; + u64 hc_tx_err_cqpdid_enforcement; + u64 hc_tx_err_mtu_violation; + u64 hc_tx_err_inval_oob; u64 hc_tx_bytes; u64 hc_tx_ucast_pkts; u64 hc_tx_ucast_bytes; @@ -359,6 +379,7 @@ struct mana_ethtool_stats { u64 hc_tx_bcast_bytes; u64 hc_tx_mcast_pkts; u64 hc_tx_mcast_bytes; + u64 hc_tx_err_gdma; u64 tx_cqe_err; u64 tx_cqe_unknown_type; u64 rx_coalesced_err; @@ -601,8 +622,8 @@ struct mana_query_gf_stat_resp { struct gdma_resp_hdr hdr; u64 reported_stats; /* rx errors/discards */ - u64 discard_rx_nowqe; - u64 err_rx_vport_disabled; + u64 rx_discards_nowqe; + u64 rx_err_vport_disabled; /* rx bytes/packets */ u64 hc_rx_bytes; u64 hc_rx_ucast_pkts; @@ -612,16 +633,16 @@ struct mana_query_gf_stat_resp { u64 hc_rx_mcast_pkts; u64 hc_rx_mcast_bytes; /* tx errors */ - u64 err_tx_gf_disabled; - u64 err_tx_vport_disabled; - u64 err_tx_inval_vport_offset_pkt; - u64 err_tx_vlan_enforcement; - u64 err_tx_ethtype_enforcement; - u64 err_tx_SA_enforecement; - u64 err_tx_SQPDID_enforcement; - u64 err_tx_CQPDID_enforcement; - u64 err_tx_mtu_violation; - u64 err_tx_inval_oob; + u64 tx_err_gf_disabled; + u64 tx_err_vport_disabled; + u64 tx_err_inval_vport_offset_pkt; + u64 tx_err_vlan_enforcement; + u64 tx_err_ethtype_enforcement; + u64 tx_err_SA_enforcement; + u64 tx_err_SQPDID_enforcement; + u64 tx_err_CQPDID_enforcement; + u64 tx_err_mtu_violation; + u64 tx_err_inval_oob; /* tx bytes/packets */ u64 hc_tx_bytes; u64 hc_tx_ucast_pkts; @@ -631,7 +652,7 @@ struct mana_query_gf_stat_resp { u64 hc_tx_mcast_pkts; u64 hc_tx_mcast_bytes; /* tx error */ - u64 err_tx_gdma; + u64 tx_err_gdma; }; /* HW DATA */ /* Configure vPort Rx Steering */ diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 6da68886fabb..0d28172193fa 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -162,7 +162,7 @@ struct neighbour { struct rcu_head rcu; struct net_device *dev; netdevice_tracker dev_tracker; - u8 primary_key[0]; + u8 primary_key[]; } __randomize_layout; struct neigh_ops { @@ -539,7 +539,7 @@ static inline int neigh_output(struct neighbour *n, struct sk_buff *skb, READ_ONCE(hh->hh_len)) return neigh_hh_output(hh, skb); - return n->output(n, skb); + return READ_ONCE(n->output)(n, skb); } static inline struct neighbour * diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index eb6cd43b1746..13b3a4e29fdb 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -368,21 +368,30 @@ static inline void put_net_track(struct net *net, netns_tracker *tracker) typedef struct { #ifdef CONFIG_NET_NS - struct net *net; + struct net __rcu *net; #endif } possible_net_t; static inline void write_pnet(possible_net_t *pnet, struct net *net) { #ifdef CONFIG_NET_NS - pnet->net = net; + rcu_assign_pointer(pnet->net, net); #endif } static inline struct net *read_pnet(const possible_net_t *pnet) { #ifdef CONFIG_NET_NS - return pnet->net; + return rcu_dereference_protected(pnet->net, true); +#else + return &init_net; +#endif +} + +static inline struct net *read_pnet_rcu(possible_net_t *pnet) +{ +#ifdef CONFIG_NET_NS + return rcu_dereference(pnet->net); #else return &init_net; #endif diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index d68b0a483431..8b8ed4e13d74 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -128,7 +128,7 @@ netdev_txq_completed_mb(struct netdev_queue *dev_queue, netdev_txq_completed_mb(txq, pkts, bytes); \ \ _res = -1; \ - if (pkts && likely(get_desc > start_thrs)) { \ + if (pkts && likely(get_desc >= start_thrs)) { \ _res = 1; \ if (unlikely(netif_tx_queue_stopped(txq)) && \ !(down_cond)) { \ diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h index cdcafb30d437..aa1716fb0e53 100644 --- a/include/net/netdev_rx_queue.h +++ b/include/net/netdev_rx_queue.h @@ -21,6 +21,10 @@ struct netdev_rx_queue { #ifdef CONFIG_XDP_SOCKETS struct xsk_buff_pool *pool; #endif + /* NAPI instance for the queue + * Readers and writers must hold RTNL + */ + struct napi_struct *napi; } ____cacheline_aligned_in_smp; /* diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 4085765c3370..cba3ccf03fcc 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -160,10 +160,6 @@ static inline struct net *nf_ct_net(const struct nf_conn *ct) return read_pnet(&ct->ct_net); } -/* Alter reply tuple (maybe alter helper). */ -void nf_conntrack_alter_reply(struct nf_conn *ct, - const struct nf_conntrack_tuple *newreply); - /* Is this tuple taken? (ignoring any belonging to the given conntrack). */ int nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, @@ -284,6 +280,16 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb) return skb->dev && skb->skb_iif && skb->dev->flags & IFF_LOOPBACK; } +static inline void nf_conntrack_alter_reply(struct nf_conn *ct, + const struct nf_conntrack_tuple *newreply) +{ + /* Must be unconfirmed, so not in hash table yet */ + if (WARN_ON(nf_ct_is_confirmed(ct))) + return; + + ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; +} + #define nfct_time_stamp ((u32)(jiffies)) /* jiffies until ct expires, 0 if already expired */ diff --git a/include/net/netfilter/nf_conntrack_act_ct.h b/include/net/netfilter/nf_conntrack_act_ct.h index 078d3c52c03f..e5f2f0b73a9a 100644 --- a/include/net/netfilter/nf_conntrack_act_ct.h +++ b/include/net/netfilter/nf_conntrack_act_ct.h @@ -20,7 +20,22 @@ static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_find(const struct nf #endif } -static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_add(struct nf_conn *ct) +static inline void nf_conn_act_ct_ext_fill(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ +#if IS_ENABLED(CONFIG_NET_ACT_CT) + struct nf_conn_act_ct_ext *act_ct_ext; + + act_ct_ext = nf_conn_act_ct_ext_find(ct); + if (dev_net(skb->dev) == &init_net && act_ct_ext) + act_ct_ext->ifindex[CTINFO2DIR(ctinfo)] = skb->dev->ifindex; +#endif +} + +static inline struct +nf_conn_act_ct_ext *nf_conn_act_ct_ext_add(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) { #if IS_ENABLED(CONFIG_NET_ACT_CT) struct nf_conn_act_ct_ext *act_ct = nf_ct_ext_find(ct, NF_CT_EXT_ACT_CT); @@ -29,22 +44,11 @@ static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_add(struct nf_conn * return act_ct; act_ct = nf_ct_ext_add(ct, NF_CT_EXT_ACT_CT, GFP_ATOMIC); + nf_conn_act_ct_ext_fill(skb, ct, ctinfo); return act_ct; #else return NULL; #endif } -static inline void nf_conn_act_ct_ext_fill(struct sk_buff *skb, struct nf_conn *ct, - enum ip_conntrack_info ctinfo) -{ -#if IS_ENABLED(CONFIG_NET_ACT_CT) - struct nf_conn_act_ct_ext *act_ct_ext; - - act_ct_ext = nf_conn_act_ct_ext_find(ct); - if (dev_net(skb->dev) == &init_net && act_ct_ext) - act_ct_ext->ifindex[CTINFO2DIR(ctinfo)] = skb->dev->ifindex; -#endif -} - #endif /* _NF_CONNTRACK_ACT_CT_H */ diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h index fcb19a4e8f2b..6903f72bcc15 100644 --- a/include/net/netfilter/nf_conntrack_labels.h +++ b/include/net/netfilter/nf_conntrack_labels.h @@ -39,7 +39,7 @@ static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct) #ifdef CONFIG_NF_CONNTRACK_LABELS struct net *net = nf_ct_net(ct); - if (net->ct.labels_used == 0) + if (atomic_read(&net->ct.labels_used) == 0) return NULL; return nf_ct_ext_add(ct, NF_CT_EXT_LABELS, GFP_ATOMIC); diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index d466e1a3b0b1..956c752ceb31 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -53,6 +53,7 @@ struct nf_flowtable_type { struct list_head list; int family; int (*init)(struct nf_flowtable *ft); + bool (*gc)(const struct flow_offload *flow); int (*setup)(struct nf_flowtable *ft, struct net_device *dev, enum flow_block_command cmd); @@ -61,6 +62,8 @@ struct nf_flowtable_type { enum flow_offload_tuple_dir dir, struct nf_flow_rule *flow_rule); void (*free)(struct nf_flowtable *ft); + void (*get)(struct nf_flowtable *ft); + void (*put)(struct nf_flowtable *ft); nf_hookfn *hook; struct module *owner; }; @@ -71,12 +74,13 @@ enum nf_flowtable_flags { }; struct nf_flowtable { - struct list_head list; - struct rhashtable rhashtable; - int priority; + unsigned int flags; /* readonly in datapath */ + int priority; /* control path (padding hole) */ + struct rhashtable rhashtable; /* datapath, read-mostly members come first */ + + struct list_head list; /* slowpath parts */ const struct nf_flowtable_type *type; struct delayed_work gc_work; - unsigned int flags; struct flow_block flow_block; struct rw_semaphore flow_block_lock; /* Guards flow_block */ possible_net_t net; @@ -239,6 +243,11 @@ nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table, } list_add_tail(&block_cb->list, &block->cb_list); + up_write(&flow_table->flow_block_lock); + + if (flow_table->type->get) + flow_table->type->get(flow_table); + return 0; unlock: up_write(&flow_table->flow_block_lock); @@ -261,6 +270,9 @@ nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table, WARN_ON(true); } up_write(&flow_table->flow_block_lock); + + if (flow_table->type->put) + flow_table->type->put(flow_table); } void flow_offload_route_init(struct flow_offload *flow, diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index dd40c75011d2..510244cc0f8f 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -178,9 +178,9 @@ static inline __be32 nft_reg_load_be32(const u32 *sreg) return *(__force __be32 *)sreg; } -static inline void nft_reg_store64(u32 *dreg, u64 val) +static inline void nft_reg_store64(u64 *dreg, u64 val) { - put_unaligned(val, (u64 *)dreg); + put_unaligned(val, dreg); } static inline u64 nft_reg_load64(const u32 *sreg) @@ -205,6 +205,7 @@ static inline void nft_data_copy(u32 *dst, const struct nft_data *src, * @nla: netlink attributes * @portid: netlink portID of the original message * @seq: netlink sequence number + * @flags: modifiers to new request * @family: protocol family * @level: depth of the chains * @report: notify via unicast netlink message @@ -274,11 +275,15 @@ struct nft_userdata { unsigned char data[]; }; +/* placeholder structure for opaque set element backend representation. */ +struct nft_elem_priv { }; + /** * struct nft_set_elem - generic representation of set elements * * @key: element key * @key_end: closing element key + * @data: element data * @priv: element private data and extensions */ struct nft_set_elem { @@ -294,9 +299,14 @@ struct nft_set_elem { u32 buf[NFT_DATA_VALUE_MAXLEN / sizeof(u32)]; struct nft_data val; } data; - void *priv; + struct nft_elem_priv *priv; }; +static inline void *nft_elem_priv_cast(const struct nft_elem_priv *priv) +{ + return (void *)priv; +} + struct nft_set; struct nft_set_iter { u8 genmask; @@ -306,7 +316,7 @@ struct nft_set_iter { int (*fn)(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, - struct nft_set_elem *elem); + struct nft_elem_priv *elem_priv); }; /** @@ -317,10 +327,10 @@ struct nft_set_iter { * @dtype: data type * @dlen: data length * @objtype: object type - * @flags: flags * @size: number of set elements * @policy: set policy * @gc_int: garbage collector interval + * @timeout: element timeout * @field_len: length of each field in concatenation, bytes * @field_count: number of concatenated fields in element * @expr: set must support for expressions @@ -343,9 +353,9 @@ struct nft_set_desc { /** * enum nft_set_class - performance class * - * @NFT_LOOKUP_O_1: constant, O(1) - * @NFT_LOOKUP_O_LOG_N: logarithmic, O(log N) - * @NFT_LOOKUP_O_N: linear, O(N) + * @NFT_SET_CLASS_O_1: constant, O(1) + * @NFT_SET_CLASS_O_LOG_N: logarithmic, O(log N) + * @NFT_SET_CLASS_O_N: linear, O(N) */ enum nft_set_class { NFT_SET_CLASS_O_1, @@ -414,9 +424,13 @@ struct nft_set_ext; * @remove: remove element from set * @walk: iterate over all set elements * @get: get set elements + * @commit: commit set elements + * @abort: abort set elements * @privsize: function to return size of set private data + * @estimate: estimate the required memory size and the lookup complexity class * @init: initialize private data of new set instance * @destroy: destroy private data of set instance + * @gc_init: initialize garbage collection * @elemsize: element private size * * Operations lookup, update and delete have simpler interfaces, are faster @@ -430,7 +444,8 @@ struct nft_set_ops { const struct nft_set_ext **ext); bool (*update)(struct nft_set *set, const u32 *key, - void *(*new)(struct nft_set *, + struct nft_elem_priv * + (*new)(struct nft_set *, const struct nft_expr *, struct nft_regs *), const struct nft_expr *expr, @@ -442,27 +457,27 @@ struct nft_set_ops { int (*insert)(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, - struct nft_set_ext **ext); + struct nft_elem_priv **priv); void (*activate)(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem); - void * (*deactivate)(const struct net *net, + struct nft_elem_priv *elem_priv); + struct nft_elem_priv * (*deactivate)(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem); - bool (*flush)(const struct net *net, + void (*flush)(const struct net *net, const struct nft_set *set, - void *priv); + struct nft_elem_priv *priv); void (*remove)(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem); + struct nft_elem_priv *elem_priv); void (*walk)(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_iter *iter); - void * (*get)(const struct net *net, + struct nft_elem_priv * (*get)(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, unsigned int flags); - void (*commit)(const struct nft_set *set); + void (*commit)(struct nft_set *set); void (*abort)(const struct nft_set *set); u64 (*privsize)(const struct nlattr * const nla[], const struct nft_set_desc *desc); @@ -531,13 +546,16 @@ struct nft_set_elem_expr { * @policy: set parameterization (see enum nft_set_policies) * @udlen: user data length * @udata: user data - * @expr: stateful expression + * @pending_update: list of pending update set element * @ops: set ops * @flags: set flags * @dead: set will be freed, never cleared * @genmask: generation mask * @klen: key length * @dlen: data length + * @num_exprs: numbers of exprs + * @exprs: stateful expression + * @catchall_list: list of catch-all set element * @data: private set data */ struct nft_set { @@ -683,6 +701,7 @@ extern const struct nft_set_ext_type nft_set_ext_types[]; * * @len: length of extension area * @offset: offsets of individual extension types + * @ext_len: length of the expected extension(used to sanity check) */ struct nft_set_ext_tmpl { u16 len; @@ -789,16 +808,22 @@ static inline struct nft_set_elem_expr *nft_set_ext_expr(const struct nft_set_ex return nft_set_ext(ext, NFT_SET_EXT_EXPRESSIONS); } -static inline bool nft_set_elem_expired(const struct nft_set_ext *ext) +static inline bool __nft_set_elem_expired(const struct nft_set_ext *ext, + u64 tstamp) { return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) && - time_is_before_eq_jiffies64(*nft_set_ext_expiration(ext)); + time_after_eq64(tstamp, *nft_set_ext_expiration(ext)); +} + +static inline bool nft_set_elem_expired(const struct nft_set_ext *ext) +{ + return __nft_set_elem_expired(ext, get_jiffies_64()); } static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set, - void *elem) + const struct nft_elem_priv *elem_priv) { - return elem + set->ops->elemsize; + return (void *)elem_priv + set->ops->elemsize; } static inline struct nft_object **nft_set_ext_obj(const struct nft_set_ext *ext) @@ -810,16 +835,19 @@ struct nft_expr *nft_set_elem_expr_alloc(const struct nft_ctx *ctx, const struct nft_set *set, const struct nlattr *attr); -void *nft_set_elem_init(const struct nft_set *set, - const struct nft_set_ext_tmpl *tmpl, - const u32 *key, const u32 *key_end, const u32 *data, - u64 timeout, u64 expiration, gfp_t gfp); +struct nft_elem_priv *nft_set_elem_init(const struct nft_set *set, + const struct nft_set_ext_tmpl *tmpl, + const u32 *key, const u32 *key_end, + const u32 *data, + u64 timeout, u64 expiration, gfp_t gfp); int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set, struct nft_expr *expr_array[]); -void nft_set_elem_destroy(const struct nft_set *set, void *elem, +void nft_set_elem_destroy(const struct nft_set *set, + const struct nft_elem_priv *elem_priv, bool destroy_expr); void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, - const struct nft_set *set, void *elem); + const struct nft_set *set, + const struct nft_elem_priv *elem_priv); struct nft_expr_ops; /** @@ -828,6 +856,7 @@ struct nft_expr_ops; * @select_ops: function to select nft_expr_ops * @release_ops: release nft_expr_ops * @ops: default ops, used when no select_ops functions is present + * @inner_ops: inner ops, used for inner packet operation * @list: used internally * @name: Identifier * @owner: module reference @@ -869,14 +898,22 @@ struct nft_offload_ctx; * struct nft_expr_ops - nf_tables expression operations * * @eval: Expression evaluation function + * @clone: Expression clone function * @size: full expression size, including private data size * @init: initialization function * @activate: activate expression in the next generation * @deactivate: deactivate expression in next generation * @destroy: destruction function, called after synchronize_rcu + * @destroy_clone: destruction clone function * @dump: function to dump parameters - * @type: expression type * @validate: validate expression, called during loop detection + * @reduce: reduce expression + * @gc: garbage collection expression + * @offload: hardware offload expression + * @offload_action: function to report true/false to allocate one slot or not in the flow + * offload array + * @offload_stats: function to synchronize hardware stats via updating the counter expression + * @type: expression type * @data: extra data to attach to this expression operation */ struct nft_expr_ops { @@ -1029,14 +1066,21 @@ struct nft_rule_blob { /** * struct nft_chain - nf_tables chain * + * @blob_gen_0: rule blob pointer to the current generation + * @blob_gen_1: rule blob pointer to the future generation * @rules: list of rules in the chain * @list: used internally * @rhlhead: used internally * @table: table that this chain belongs to * @handle: chain handle * @use: number of jump references to this chain - * @flags: bitmask of enum nft_chain_flags + * @flags: bitmask of enum NFTA_CHAIN_FLAGS + * @bound: bind or not + * @genmask: generation mask * @name: name of the chain + * @udlen: user data length + * @udata: user data in the chain + * @blob_next: rule blob pointer to the next in the chain */ struct nft_chain { struct nft_rule_blob __rcu *blob_gen_0; @@ -1061,7 +1105,7 @@ struct nft_chain { int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain); int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, - struct nft_set_elem *elem); + struct nft_elem_priv *elem_priv); int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set); int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); @@ -1134,6 +1178,7 @@ struct nft_hook { * @hook_list: list of netfilter hooks (for NFPROTO_NETDEV family) * @type: chain type * @policy: default policy + * @flags: indicate the base chain disabled or not * @stats: per-cpu chain stats * @chain: the chain * @flow_block: flow block (for hardware offload) @@ -1198,10 +1243,13 @@ static inline void nft_use_inc_restore(u32 *use) * @hgenerator: handle generator state * @handle: table handle * @use: number of chain references to this table + * @family:address family * @flags: table flag (see enum nft_table_flags) * @genmask: generation mask - * @afinfo: address family info + * @nlpid: netlink port ID * @name: name of the table + * @udlen: length of the user data + * @udata: user data * @validate_state: internal, set when transaction adds jumps */ struct nft_table { @@ -1259,11 +1307,13 @@ struct nft_object_hash_key { * struct nft_object - nf_tables stateful object * * @list: table stateful object list node - * @key: keys that identify this object * @rhlhead: nft_objname_ht node + * @key: keys that identify this object * @genmask: generation mask * @use: number of references to this stateful object * @handle: unique object handle + * @udlen: length of user data + * @udata: user data * @ops: object operations * @data: object data, layout depends on type */ @@ -1307,6 +1357,7 @@ void nft_obj_notify(struct net *net, const struct nft_table *table, * @type: stateful object numeric type * @owner: module owner * @maxattr: maximum netlink attribute + * @family: address family for AF-specific object types * @policy: netlink attribute policy */ struct nft_object_type { @@ -1316,6 +1367,7 @@ struct nft_object_type { struct list_head list; u32 type; unsigned int maxattr; + u8 family; struct module *owner; const struct nla_policy *policy; }; @@ -1329,6 +1381,7 @@ struct nft_object_type { * @destroy: release existing stateful object * @dump: netlink dump stateful object * @update: update stateful object + * @type: pointer to object type */ struct nft_object_ops { void (*eval)(struct nft_object *obj, @@ -1364,9 +1417,8 @@ void nft_unregister_obj(struct nft_object_type *obj_type); * @genmask: generation mask * @use: number of references to this flow table * @handle: unique object handle - * @dev_name: array of device names + * @hook_list: hook list for hooks per net_device in flowtables * @data: rhashtable and garbage collector - * @ops: array of hooks */ struct nft_flowtable { struct list_head list; @@ -1635,14 +1687,14 @@ struct nft_trans_table { struct nft_trans_elem { struct nft_set *set; - struct nft_set_elem elem; + struct nft_elem_priv *elem_priv; bool bound; }; #define nft_trans_elem_set(trans) \ (((struct nft_trans_elem *)trans->data)->set) -#define nft_trans_elem(trans) \ - (((struct nft_trans_elem *)trans->data)->elem) +#define nft_trans_elem_priv(trans) \ + (((struct nft_trans_elem *)trans->data)->elem_priv) #define nft_trans_elem_set_bound(trans) \ (((struct nft_trans_elem *)trans->data)->bound) @@ -1682,8 +1734,8 @@ struct nft_trans_gc { struct net *net; struct nft_set *set; u32 seq; - u8 count; - void *priv[NFT_TRANS_GC_BATCHCOUNT]; + u16 count; + struct nft_elem_priv *priv[NFT_TRANS_GC_BATCHCOUNT]; struct rcu_head rcu; }; @@ -1700,12 +1752,13 @@ void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans); void nft_trans_gc_elem_add(struct nft_trans_gc *gc, void *priv); -struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc, - unsigned int gc_seq); +struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc, + unsigned int gc_seq); +struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc); void nft_setelem_data_deactivate(const struct net *net, const struct nft_set *set, - struct nft_set_elem *elem); + struct nft_elem_priv *elem_priv); int __init nft_chain_filter_init(void); void nft_chain_filter_fini(void); @@ -1732,6 +1785,7 @@ struct nftables_pernet { struct list_head notify_list; struct mutex commit_mutex; u64 table_handle; + u64 tstamp; unsigned int base_seq; unsigned int gc_seq; u8 validate_state; @@ -1744,6 +1798,11 @@ static inline struct nftables_pernet *nft_pernet(const struct net *net) return net_generic(net, nf_tables_net_id); } +static inline u64 nft_net_tstamp(const struct net *net) +{ + return nft_pernet(net)->tstamp; +} + #define __NFT_REDUCE_READONLY 1UL #define NFT_REDUCE_READONLY (void *)__NFT_REDUCE_READONLY diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h index 947973623dc7..60a7d0ce3080 100644 --- a/include/net/netfilter/nf_tables_ipv4.h +++ b/include/net/netfilter/nf_tables_ipv4.h @@ -30,7 +30,7 @@ static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt) return -1; len = iph_totlen(pkt->skb, iph); - thoff = iph->ihl * 4; + thoff = skb_network_offset(pkt->skb) + (iph->ihl * 4); if (pkt->skb->len < len) return -1; else if (len < thoff) diff --git a/include/net/netkit.h b/include/net/netkit.h new file mode 100644 index 000000000000..9ec0163739f4 --- /dev/null +++ b/include/net/netkit.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2023 Isovalent */ +#ifndef __NET_NETKIT_H +#define __NET_NETKIT_H + +#include <linux/bpf.h> + +#ifdef CONFIG_NETKIT +int netkit_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog); +int netkit_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); +int netkit_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog); +int netkit_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); +INDIRECT_CALLABLE_DECLARE(struct net_device *netkit_peer_dev(struct net_device *dev)); +#else +static inline int netkit_prog_attach(const union bpf_attr *attr, + struct bpf_prog *prog) +{ + return -EINVAL; +} + +static inline int netkit_link_attach(const union bpf_attr *attr, + struct bpf_prog *prog) +{ + return -EINVAL; +} + +static inline int netkit_prog_detach(const union bpf_attr *attr, + struct bpf_prog *prog) +{ + return -EINVAL; +} + +static inline int netkit_prog_query(const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + return -EINVAL; +} + +static inline struct net_device *netkit_peer_dev(struct net_device *dev) +{ + return NULL; +} +#endif /* CONFIG_NETKIT */ +#endif /* __NET_NETKIT_H */ diff --git a/include/net/netlink.h b/include/net/netlink.h index 8a7cd1170e1f..c19ff921b661 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -128,6 +128,8 @@ * nla_len(nla) length of attribute payload * * Attribute Payload Access for Basic Types: + * nla_get_uint(nla) get payload for a uint attribute + * nla_get_sint(nla) get payload for a sint attribute * nla_get_u8(nla) get payload for a u8 attribute * nla_get_u16(nla) get payload for a u16 attribute * nla_get_u32(nla) get payload for a u32 attribute @@ -183,6 +185,8 @@ enum { NLA_REJECT, NLA_BE16, NLA_BE32, + NLA_SINT, + NLA_UINT, __NLA_TYPE_MAX, }; @@ -229,6 +233,7 @@ enum nla_policy_validation { * nested header (or empty); len field is used if * nested_policy is also used, for the max attr * number in the nested policy. + * NLA_SINT, NLA_UINT, * NLA_U8, NLA_U16, * NLA_U32, NLA_U64, * NLA_S8, NLA_S16, @@ -260,12 +265,14 @@ enum nla_policy_validation { * while an array has the nested attributes at another * level down and the attribute types directly in the * nesting don't matter. + * NLA_UINT, * NLA_U8, * NLA_U16, * NLA_U32, * NLA_U64, * NLA_BE16, * NLA_BE32, + * NLA_SINT, * NLA_S8, * NLA_S16, * NLA_S32, @@ -280,6 +287,7 @@ enum nla_policy_validation { * or NLA_POLICY_FULL_RANGE_SIGNED() macros instead. * Use the NLA_POLICY_MIN(), NLA_POLICY_MAX() and * NLA_POLICY_RANGE() macros. + * NLA_UINT, * NLA_U8, * NLA_U16, * NLA_U32, @@ -288,6 +296,7 @@ enum nla_policy_validation { * to a struct netlink_range_validation that indicates * the min/max values. * Use NLA_POLICY_FULL_RANGE(). + * NLA_SINT, * NLA_S8, * NLA_S16, * NLA_S32, @@ -351,8 +360,8 @@ struct nla_policy { const u32 mask; const char *reject_message; const struct nla_policy *nested_policy; - struct netlink_range_validation *range; - struct netlink_range_validation_signed *range_signed; + const struct netlink_range_validation *range; + const struct netlink_range_validation_signed *range_signed; struct { s16 min, max; }; @@ -377,9 +386,11 @@ struct nla_policy { #define __NLA_IS_UINT_TYPE(tp) \ (tp == NLA_U8 || tp == NLA_U16 || tp == NLA_U32 || \ - tp == NLA_U64 || tp == NLA_BE16 || tp == NLA_BE32) + tp == NLA_U64 || tp == NLA_UINT || \ + tp == NLA_BE16 || tp == NLA_BE32) #define __NLA_IS_SINT_TYPE(tp) \ - (tp == NLA_S8 || tp == NLA_S16 || tp == NLA_S32 || tp == NLA_S64) + (tp == NLA_S8 || tp == NLA_S16 || tp == NLA_S32 || tp == NLA_S64 || \ + tp == NLA_SINT) #define __NLA_ENSURE(condition) BUILD_BUG_ON_ZERO(!(condition)) #define NLA_ENSURE_UINT_TYPE(tp) \ @@ -1000,6 +1011,20 @@ static inline struct sk_buff *nlmsg_new(size_t payload, gfp_t flags) } /** + * nlmsg_new_large - Allocate a new netlink message with non-contiguous + * physical memory + * @payload: size of the message payload + * + * The allocated skb is unable to have frag page for shinfo->frags*, + * as the NULL setting for skb->head in netlink_skb_destructor() will + * bypass most of the handling in skb_release_data() + */ +static inline struct sk_buff *nlmsg_new_large(size_t payload) +{ + return netlink_alloc_large_skb(nlmsg_total_size(payload), 0); +} + +/** * nlmsg_end - Finalize a netlink message * @skb: socket buffer the message is stored in * @nlh: netlink message header @@ -1062,21 +1087,29 @@ static inline void nlmsg_free(struct sk_buff *skb) } /** - * nlmsg_multicast - multicast a netlink message + * nlmsg_multicast_filtered - multicast a netlink message with filter function * @sk: netlink socket to spread messages to * @skb: netlink message as socket buffer * @portid: own netlink portid to avoid sending to yourself * @group: multicast group id * @flags: allocation flags + * @filter: filter function + * @filter_data: filter function private data + * + * Return: 0 on success, negative error code for failure. */ -static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb, - u32 portid, unsigned int group, gfp_t flags) +static inline int nlmsg_multicast_filtered(struct sock *sk, struct sk_buff *skb, + u32 portid, unsigned int group, + gfp_t flags, + netlink_filter_fn filter, + void *filter_data) { int err; NETLINK_CB(skb).dst_group = group; - err = netlink_broadcast(sk, skb, portid, group, flags); + err = netlink_broadcast_filtered(sk, skb, portid, group, flags, + filter, filter_data); if (err > 0) err = 0; @@ -1084,6 +1117,21 @@ static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb, } /** + * nlmsg_multicast - multicast a netlink message + * @sk: netlink socket to spread messages to + * @skb: netlink message as socket buffer + * @portid: own netlink portid to avoid sending to yourself + * @group: multicast group id + * @flags: allocation flags + */ +static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb, + u32 portid, unsigned int group, gfp_t flags) +{ + return nlmsg_multicast_filtered(sk, skb, portid, group, flags, + NULL, NULL); +} + +/** * nlmsg_unicast - unicast a netlink message * @sk: netlink socket to spread message to * @skb: netlink message as socket buffer @@ -1189,7 +1237,7 @@ static inline void *nla_data(const struct nlattr *nla) * nla_len - length of payload * @nla: netlink attribute */ -static inline int nla_len(const struct nlattr *nla) +static inline u16 nla_len(const struct nlattr *nla) { return nla->nla_len - NLA_HDRLEN; } @@ -1358,6 +1406,22 @@ static inline int nla_put_u32(struct sk_buff *skb, int attrtype, u32 value) } /** + * nla_put_uint - Add a variable-size unsigned int to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @value: numeric value + */ +static inline int nla_put_uint(struct sk_buff *skb, int attrtype, u64 value) +{ + u64 tmp64 = value; + u32 tmp32 = value; + + if (tmp64 == tmp32) + return nla_put_u32(skb, attrtype, tmp32); + return nla_put(skb, attrtype, sizeof(u64), &tmp64); +} + +/** * nla_put_be32 - Add a __be32 netlink attribute to a socket buffer * @skb: socket buffer to add attribute to * @attrtype: attribute type @@ -1512,6 +1576,22 @@ static inline int nla_put_s64(struct sk_buff *skb, int attrtype, s64 value, } /** + * nla_put_sint - Add a variable-size signed int to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @value: numeric value + */ +static inline int nla_put_sint(struct sk_buff *skb, int attrtype, s64 value) +{ + s64 tmp64 = value; + s32 tmp32 = value; + + if (tmp64 == tmp32) + return nla_put_s32(skb, attrtype, tmp32); + return nla_put(skb, attrtype, sizeof(s64), &tmp64); +} + +/** * nla_put_string - Add a string netlink attribute to a socket buffer * @skb: socket buffer to add attribute to * @attrtype: attribute type @@ -1668,6 +1748,17 @@ static inline u64 nla_get_u64(const struct nlattr *nla) } /** + * nla_get_uint - return payload of uint attribute + * @nla: uint netlink attribute + */ +static inline u64 nla_get_uint(const struct nlattr *nla) +{ + if (nla_len(nla) == sizeof(u32)) + return nla_get_u32(nla); + return nla_get_u64(nla); +} + +/** * nla_get_be64 - return payload of __be64 attribute * @nla: __be64 netlink attribute */ @@ -1730,6 +1821,17 @@ static inline s64 nla_get_s64(const struct nlattr *nla) } /** + * nla_get_sint - return payload of uint attribute + * @nla: uint netlink attribute + */ +static inline s64 nla_get_sint(const struct nlattr *nla) +{ + if (nla_len(nla) == sizeof(s32)) + return nla_get_s32(nla); + return nla_get_s64(nla); +} + +/** * nla_get_flag - return payload of flag attribute * @nla: flag netlink attribute */ diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 1f463b3957c7..bae914815aa3 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -107,7 +107,7 @@ struct netns_ct { struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb; struct nf_ip_net nf_ct_proto; #if defined(CONFIG_NF_CONNTRACK_LABELS) - unsigned int labels_used; + atomic_t labels_used; #endif }; #endif diff --git a/include/net/netns/core.h b/include/net/netns/core.h index a91ef9f8de60..78214f1b43a2 100644 --- a/include/net/netns/core.h +++ b/include/net/netns/core.h @@ -13,6 +13,7 @@ struct netns_core { struct ctl_table_header *sysctl_hdr; int sysctl_somaxconn; + int sysctl_optmem_max; u8 sysctl_txrehash; #ifdef CONFIG_PROC_FS diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 7a41c4791536..c356c458b340 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -19,8 +19,7 @@ struct hlist_head; struct fib_table; struct sock; struct local_ports { - seqlock_t lock; - int range[2]; + u32 range; /* high << 16 | low */ bool warned; }; @@ -42,6 +41,38 @@ struct inet_timewait_death_row { struct tcp_fastopen_context; struct netns_ipv4 { + /* Cacheline organization can be found documented in + * Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst. + * Please update the document when adding new fields. + */ + + /* TX readonly hotpath cache lines */ + __cacheline_group_begin(netns_ipv4_read_tx); + u8 sysctl_tcp_early_retrans; + u8 sysctl_tcp_tso_win_divisor; + u8 sysctl_tcp_tso_rtt_log; + u8 sysctl_tcp_autocorking; + int sysctl_tcp_min_snd_mss; + unsigned int sysctl_tcp_notsent_lowat; + int sysctl_tcp_limit_output_bytes; + int sysctl_tcp_min_rtt_wlen; + int sysctl_tcp_wmem[3]; + u8 sysctl_ip_fwd_use_pmtu; + __cacheline_group_end(netns_ipv4_read_tx); + + /* TXRX readonly hotpath cache lines */ + __cacheline_group_begin(netns_ipv4_read_txrx); + u8 sysctl_tcp_moderate_rcvbuf; + __cacheline_group_end(netns_ipv4_read_txrx); + + /* RX readonly hotpath cache line */ + __cacheline_group_begin(netns_ipv4_read_rx); + u8 sysctl_ip_early_demux; + u8 sysctl_tcp_early_demux; + int sysctl_tcp_reordering; + int sysctl_tcp_rmem[3]; + __cacheline_group_end(netns_ipv4_read_rx); + struct inet_timewait_death_row tcp_death_row; struct udp_table *udp_table; @@ -96,17 +127,14 @@ struct netns_ipv4 { u8 sysctl_ip_default_ttl; u8 sysctl_ip_no_pmtu_disc; - u8 sysctl_ip_fwd_use_pmtu; u8 sysctl_ip_fwd_update_priority; u8 sysctl_ip_nonlocal_bind; u8 sysctl_ip_autobind_reuse; /* Shall we try to damage output packets if routing dev changes? */ u8 sysctl_ip_dynaddr; - u8 sysctl_ip_early_demux; #ifdef CONFIG_NET_L3_MASTER_DEV u8 sysctl_raw_l3mdev_accept; #endif - u8 sysctl_tcp_early_demux; u8 sysctl_udp_early_demux; u8 sysctl_nexthop_compat_mode; @@ -119,7 +147,6 @@ struct netns_ipv4 { u8 sysctl_tcp_mtu_probing; int sysctl_tcp_mtu_probe_floor; int sysctl_tcp_base_mss; - int sysctl_tcp_min_snd_mss; int sysctl_tcp_probe_threshold; u32 sysctl_tcp_probe_interval; @@ -132,17 +159,17 @@ struct netns_ipv4 { u8 sysctl_tcp_syncookies; u8 sysctl_tcp_migrate_req; u8 sysctl_tcp_comp_sack_nr; - int sysctl_tcp_reordering; + u8 sysctl_tcp_backlog_ack_defer; + u8 sysctl_tcp_pingpong_thresh; + u8 sysctl_tcp_retries1; u8 sysctl_tcp_retries2; u8 sysctl_tcp_orphan_retries; u8 sysctl_tcp_tw_reuse; int sysctl_tcp_fin_timeout; - unsigned int sysctl_tcp_notsent_lowat; u8 sysctl_tcp_sack; u8 sysctl_tcp_window_scaling; u8 sysctl_tcp_timestamps; - u8 sysctl_tcp_early_retrans; u8 sysctl_tcp_recovery; u8 sysctl_tcp_thin_linear_timeouts; u8 sysctl_tcp_slow_start_after_idle; @@ -158,21 +185,13 @@ struct netns_ipv4 { u8 sysctl_tcp_frto; u8 sysctl_tcp_nometrics_save; u8 sysctl_tcp_no_ssthresh_metrics_save; - u8 sysctl_tcp_moderate_rcvbuf; - u8 sysctl_tcp_tso_win_divisor; u8 sysctl_tcp_workaround_signed_windows; - int sysctl_tcp_limit_output_bytes; int sysctl_tcp_challenge_ack_limit; - int sysctl_tcp_min_rtt_wlen; u8 sysctl_tcp_min_tso_segs; - u8 sysctl_tcp_tso_rtt_log; - u8 sysctl_tcp_autocorking; u8 sysctl_tcp_reflect_tos; int sysctl_tcp_invalid_ratelimit; int sysctl_tcp_pacing_ss_ratio; int sysctl_tcp_pacing_ca_ratio; - int sysctl_tcp_wmem[3]; - int sysctl_tcp_rmem[3]; unsigned int sysctl_tcp_child_ehash_entries; unsigned long sysctl_tcp_comp_sack_delay_ns; unsigned long sysctl_tcp_comp_sack_slack_ns; diff --git a/include/net/netns/smc.h b/include/net/netns/smc.h index 582212ada3ba..fc752a50f91b 100644 --- a/include/net/netns/smc.h +++ b/include/net/netns/smc.h @@ -22,5 +22,7 @@ struct netns_smc { int sysctl_smcr_testlink_time; int sysctl_wmem; int sysctl_rmem; + int sysctl_max_links_per_lgr; + int sysctl_max_conns_per_lgr; }; #endif diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index bd7c3be4af5d..423b52eca908 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -50,6 +50,7 @@ struct netns_xfrm { struct list_head policy_all; struct hlist_head *policy_byidx; unsigned int policy_idx_hmask; + unsigned int idx_generator; struct hlist_head policy_inexact[XFRM_POLICY_MAX]; struct xfrm_policy_hash policy_bydst[XFRM_POLICY_MAX]; unsigned int policy_count[XFRM_POLICY_MAX * 2]; diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 2b12725de9c0..d92046a4a078 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -92,7 +92,7 @@ struct nh_res_table { u32 unbalanced_timer; u16 num_nh_buckets; - struct nh_res_bucket nh_buckets[]; + struct nh_res_bucket nh_buckets[] __counted_by(num_nh_buckets); }; struct nh_grp_entry { @@ -126,7 +126,7 @@ struct nh_group { bool has_v4; struct nh_res_table __rcu *res_table; - struct nh_grp_entry nh_entries[]; + struct nh_grp_entry nh_entries[] __counted_by(num_nh); }; struct nexthop { @@ -187,7 +187,7 @@ struct nh_notifier_grp_entry_info { struct nh_notifier_grp_info { u16 num_nh; bool is_fdb; - struct nh_notifier_grp_entry_info nh_entries[]; + struct nh_notifier_grp_entry_info nh_entries[] __counted_by(num_nh); }; struct nh_notifier_res_bucket_info { @@ -200,7 +200,7 @@ struct nh_notifier_res_bucket_info { struct nh_notifier_res_table_info { u16 num_nh_buckets; - struct nh_notifier_single_info nhs[]; + struct nh_notifier_single_info nhs[] __counted_by(num_nh_buckets); }; struct nh_notifier_info { diff --git a/include/net/nl802154.h b/include/net/nl802154.h index 8cd9d141f5af..4c752f799957 100644 --- a/include/net/nl802154.h +++ b/include/net/nl802154.h @@ -78,6 +78,10 @@ enum nl802154_commands { NL802154_CMD_SCAN_DONE, NL802154_CMD_SEND_BEACONS, NL802154_CMD_STOP_BEACONS, + NL802154_CMD_ASSOCIATE, + NL802154_CMD_DISASSOCIATE, + NL802154_CMD_SET_MAX_ASSOCIATIONS, + NL802154_CMD_LIST_ASSOCIATIONS, /* add new commands above here */ @@ -147,6 +151,8 @@ enum nl802154_attrs { NL802154_ATTR_SCAN_DURATION, NL802154_ATTR_SCAN_DONE_REASON, NL802154_ATTR_BEACON_INTERVAL, + NL802154_ATTR_MAX_ASSOCIATIONS, + NL802154_ATTR_PEER, /* add attributes here, update the policy in nl802154.c */ @@ -385,8 +391,6 @@ enum nl802154_supported_bool_states { NL802154_SUPPORTED_BOOL_MAX = __NL802154_SUPPORTED_BOOL_AFTER_LAST - 1 }; -#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL - enum nl802154_dev_addr_modes { NL802154_DEV_ADDR_NONE, __NL802154_DEV_ADDR_INVALID, @@ -406,12 +410,26 @@ enum nl802154_dev_addr_attrs { NL802154_DEV_ADDR_ATTR_SHORT, NL802154_DEV_ADDR_ATTR_EXTENDED, NL802154_DEV_ADDR_ATTR_PAD, + NL802154_DEV_ADDR_ATTR_PEER_TYPE, /* keep last */ __NL802154_DEV_ADDR_ATTR_AFTER_LAST, NL802154_DEV_ADDR_ATTR_MAX = __NL802154_DEV_ADDR_ATTR_AFTER_LAST - 1 }; +enum nl802154_peer_type { + NL802154_PEER_TYPE_UNSPEC, + + NL802154_PEER_TYPE_PARENT, + NL802154_PEER_TYPE_CHILD, + + /* keep last */ + __NL802154_PEER_TYPE_AFTER_LAST, + NL802154_PEER_TYPE_MAX = __NL802154_PEER_TYPE_AFTER_LAST - 1 +}; + +#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL + enum nl802154_key_id_modes { NL802154_KEY_ID_MODE_IMPLICIT, NL802154_KEY_ID_MODE_INDEX, diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 94231533a369..1d397c1a0043 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -8,23 +8,46 @@ /** * DOC: page_pool allocator * - * The page_pool allocator is optimized for the XDP mode that - * uses one frame per-page, but it can fallback on the - * regular page allocator APIs. - * - * Basic use involves replacing alloc_pages() calls with the - * page_pool_alloc_pages() call. Drivers should use - * page_pool_dev_alloc_pages() replacing dev_alloc_pages(). - * - * API keeps track of in-flight pages, in order to let API user know - * when it is safe to free a page_pool object. Thus, API users - * must call page_pool_put_page() to free the page, or attach - * the page to a page_pool-aware objects like skbs marked with + * The page_pool allocator is optimized for recycling page or page fragment used + * by skb packet and xdp frame. + * + * Basic use involves replacing any alloc_pages() calls with page_pool_alloc(), + * which allocate memory with or without page splitting depending on the + * requested memory size. + * + * If the driver knows that it always requires full pages or its allocations are + * always smaller than half a page, it can use one of the more specific API + * calls: + * + * 1. page_pool_alloc_pages(): allocate memory without page splitting when + * driver knows that the memory it need is always bigger than half of the page + * allocated from page pool. There is no cache line dirtying for 'struct page' + * when a page is recycled back to the page pool. + * + * 2. page_pool_alloc_frag(): allocate memory with page splitting when driver + * knows that the memory it need is always smaller than or equal to half of the + * page allocated from page pool. Page splitting enables memory saving and thus + * avoids TLB/cache miss for data access, but there also is some cost to + * implement page splitting, mainly some cache line dirtying/bouncing for + * 'struct page' and atomic operation for page->pp_ref_count. + * + * The API keeps track of in-flight pages, in order to let API users know when + * it is safe to free a page_pool object, the API users must call + * page_pool_put_page() or page_pool_free_va() to free the page_pool object, or + * attach the page_pool object to a page_pool-aware object like skbs marked with * skb_mark_for_recycle(). * - * API user must call page_pool_put_page() once on a page, as it - * will either recycle the page, or in case of refcnt > 1, it will - * release the DMA mapping and in-flight state accounting. + * page_pool_put_page() may be called multiple times on the same page if a page + * is split into multiple fragments. For the last fragment, it will either + * recycle the page, or in case of page->_refcount > 1, it will release the DMA + * mapping and in-flight state accounting. + * + * dma_sync_single_range_for_device() is only called for the last fragment when + * page_pool is created with PP_FLAG_DMA_SYNC_DEV flag, so it depends on the + * last freed fragment to do the sync_for_device operation for all fragments in + * the same page when a page is split. The API user must setup pool->p.max_len + * and pool->p.offset correctly and ensure that page_pool_put_page() is called + * with dma_sync_size being -1 for fragment API. */ #ifndef _NET_PAGE_POOL_HELPERS_H #define _NET_PAGE_POOL_HELPERS_H @@ -32,16 +55,12 @@ #include <net/page_pool/types.h> #ifdef CONFIG_PAGE_POOL_STATS +/* Deprecated driver-facing API, use netlink instead */ int page_pool_ethtool_stats_get_count(void); u8 *page_pool_ethtool_stats_get_strings(u8 *data); u64 *page_pool_ethtool_stats_get(u64 *data, void *stats); -/* - * Drivers that wish to harvest page pool stats and report them to users - * (perhaps via ethtool, debugfs, or another mechanism) can allocate a - * struct page_pool_stats call page_pool_get_stats to get stats for the specified pool. - */ -bool page_pool_get_stats(struct page_pool *pool, +bool page_pool_get_stats(const struct page_pool *pool, struct page_pool_stats *stats); #else static inline int page_pool_ethtool_stats_get_count(void) @@ -73,6 +92,17 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool) return page_pool_alloc_pages(pool, gfp); } +/** + * page_pool_dev_alloc_frag() - allocate a page fragment. + * @pool: pool from which to allocate + * @offset: offset to the allocated page + * @size: requested size + * + * Get a page fragment from the page allocator or page_pool caches. + * + * Return: + * Return allocated page fragment, otherwise return NULL. + */ static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool, unsigned int *offset, unsigned int size) @@ -82,6 +112,91 @@ static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool, return page_pool_alloc_frag(pool, offset, size, gfp); } +static inline struct page *page_pool_alloc(struct page_pool *pool, + unsigned int *offset, + unsigned int *size, gfp_t gfp) +{ + unsigned int max_size = PAGE_SIZE << pool->p.order; + struct page *page; + + if ((*size << 1) > max_size) { + *size = max_size; + *offset = 0; + return page_pool_alloc_pages(pool, gfp); + } + + page = page_pool_alloc_frag(pool, offset, *size, gfp); + if (unlikely(!page)) + return NULL; + + /* There is very likely not enough space for another fragment, so append + * the remaining size to the current fragment to avoid truesize + * underestimate problem. + */ + if (pool->frag_offset + *size > max_size) { + *size = max_size - *offset; + pool->frag_offset = max_size; + } + + return page; +} + +/** + * page_pool_dev_alloc() - allocate a page or a page fragment. + * @pool: pool from which to allocate + * @offset: offset to the allocated page + * @size: in as the requested size, out as the allocated size + * + * Get a page or a page fragment from the page allocator or page_pool caches + * depending on the requested size in order to allocate memory with least memory + * utilization and performance penalty. + * + * Return: + * Return allocated page or page fragment, otherwise return NULL. + */ +static inline struct page *page_pool_dev_alloc(struct page_pool *pool, + unsigned int *offset, + unsigned int *size) +{ + gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); + + return page_pool_alloc(pool, offset, size, gfp); +} + +static inline void *page_pool_alloc_va(struct page_pool *pool, + unsigned int *size, gfp_t gfp) +{ + unsigned int offset; + struct page *page; + + /* Mask off __GFP_HIGHMEM to ensure we can use page_address() */ + page = page_pool_alloc(pool, &offset, size, gfp & ~__GFP_HIGHMEM); + if (unlikely(!page)) + return NULL; + + return page_address(page) + offset; +} + +/** + * page_pool_dev_alloc_va() - allocate a page or a page fragment and return its + * va. + * @pool: pool from which to allocate + * @size: in as the requested size, out as the allocated size + * + * This is just a thin wrapper around the page_pool_alloc() API, and + * it returns va of the allocated page or page fragment. + * + * Return: + * Return the va for the allocated page or page fragment, otherwise return NULL. + */ +static inline void *page_pool_dev_alloc_va(struct page_pool *pool, + unsigned int *size) +{ + gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); + + return page_pool_alloc_va(pool, size, gfp); +} + /** * page_pool_get_dma_dir() - Retrieve the stored DMA direction. * @pool: pool from which page was allocated @@ -95,48 +210,82 @@ inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool) return pool->p.dma_dir; } -/* pp_frag_count represents the number of writers who can update the page - * either by updating skb->data or via DMA mappings for the device. - * We can't rely on the page refcnt for that as we don't know who might be - * holding page references and we can't reliably destroy or sync DMA mappings - * of the fragments. +/** + * page_pool_fragment_page() - split a fresh page into fragments + * @page: page to split + * @nr: references to set + * + * pp_ref_count represents the number of outstanding references to the page, + * which will be freed using page_pool APIs (rather than page allocator APIs + * like put_page()). Such references are usually held by page_pool-aware + * objects like skbs marked for page pool recycling. * - * When pp_frag_count reaches 0 we can either recycle the page if the page - * refcnt is 1 or return it back to the memory allocator and destroy any - * mappings we have. + * This helper allows the caller to take (set) multiple references to a + * freshly allocated page. The page must be freshly allocated (have a + * pp_ref_count of 1). This is commonly done by drivers and + * "fragment allocators" to save atomic operations - either when they know + * upfront how many references they will need; or to take MAX references and + * return the unused ones with a single atomic dec(), instead of performing + * multiple atomic inc() operations. */ static inline void page_pool_fragment_page(struct page *page, long nr) { - atomic_long_set(&page->pp_frag_count, nr); + atomic_long_set(&page->pp_ref_count, nr); } -static inline long page_pool_defrag_page(struct page *page, long nr) +static inline long page_pool_unref_page(struct page *page, long nr) { long ret; - /* If nr == pp_frag_count then we have cleared all remaining - * references to the page. No need to actually overwrite it, instead - * we can leave this to be overwritten by the calling function. + /* If nr == pp_ref_count then we have cleared all remaining + * references to the page: + * 1. 'n == 1': no need to actually overwrite it. + * 2. 'n != 1': overwrite it with one, which is the rare case + * for pp_ref_count draining. * - * The main advantage to doing this is that an atomic_read is - * generally a much cheaper operation than an atomic update, - * especially when dealing with a page that may be partitioned - * into only 2 or 3 pieces. + * The main advantage to doing this is that not only we avoid a atomic + * update, as an atomic_read is generally a much cheaper operation than + * an atomic update, especially when dealing with a page that may be + * referenced by only 2 or 3 users; but also unify the pp_ref_count + * handling by ensuring all pages have partitioned into only 1 piece + * initially, and only overwrite it when the page is partitioned into + * more than one piece. */ - if (atomic_long_read(&page->pp_frag_count) == nr) + if (atomic_long_read(&page->pp_ref_count) == nr) { + /* As we have ensured nr is always one for constant case using + * the BUILD_BUG_ON(), only need to handle the non-constant case + * here for pp_ref_count draining, which is a rare case. + */ + BUILD_BUG_ON(__builtin_constant_p(nr) && nr != 1); + if (!__builtin_constant_p(nr)) + atomic_long_set(&page->pp_ref_count, 1); + return 0; + } - ret = atomic_long_sub_return(nr, &page->pp_frag_count); + ret = atomic_long_sub_return(nr, &page->pp_ref_count); WARN_ON(ret < 0); + + /* We are the last user here too, reset pp_ref_count back to 1 to + * ensure all pages have been partitioned into 1 piece initially, + * this should be the rare case when the last two fragment users call + * page_pool_unref_page() currently. + */ + if (unlikely(!ret)) + atomic_long_set(&page->pp_ref_count, 1); + return ret; } -static inline bool page_pool_is_last_frag(struct page_pool *pool, - struct page *page) +static inline void page_pool_ref_page(struct page *page) { - /* If fragments aren't enabled or count is 0 we were the last user */ - return !(pool->p.flags & PP_FLAG_PAGE_FRAG) || - (page_pool_defrag_page(page, 1) == 0); + atomic_long_inc(&page->pp_ref_count); +} + +static inline bool page_pool_is_last_ref(struct page *page) +{ + /* If page_pool_unref_page() returns 0, we were the last user */ + return page_pool_unref_page(page, 1) == 0; } /** @@ -161,10 +310,10 @@ static inline void page_pool_put_page(struct page_pool *pool, * allow registering MEM_TYPE_PAGE_POOL, but shield linker. */ #ifdef CONFIG_PAGE_POOL - if (!page_pool_is_last_frag(pool, page)) + if (!page_pool_is_last_ref(page)) return; - page_pool_put_defragged_page(pool, page, dma_sync_size, allow_direct); + page_pool_put_unrefed_page(pool, page, dma_sync_size, allow_direct); #endif } @@ -197,10 +346,24 @@ static inline void page_pool_recycle_direct(struct page_pool *pool, page_pool_put_full_page(pool, page, true); } -#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT \ +#define PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA \ (sizeof(dma_addr_t) > sizeof(unsigned long)) /** + * page_pool_free_va() - free a va into the page_pool + * @pool: pool from which va was allocated + * @va: va to be freed + * @allow_direct: freed by the consumer, allow lockless caching + * + * Free a va allocated from page_pool_allo_va(). + */ +static inline void page_pool_free_va(struct page_pool *pool, void *va, + bool allow_direct) +{ + page_pool_put_page(pool, virt_to_head_page(va), -1, allow_direct); +} + +/** * page_pool_get_dma_addr() - Retrieve the stored DMA address. * @page: page allocated from a page pool * @@ -211,17 +374,25 @@ static inline dma_addr_t page_pool_get_dma_addr(struct page *page) { dma_addr_t ret = page->dma_addr; - if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT) - ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16; + if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) + ret <<= PAGE_SHIFT; return ret; } -static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr) +static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr) { + if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) { + page->dma_addr = addr >> PAGE_SHIFT; + + /* We assume page alignment to shave off bottom bits, + * if this "compression" doesn't work we need to drop. + */ + return addr != (dma_addr_t)page->dma_addr << PAGE_SHIFT; + } + page->dma_addr = addr; - if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT) - page->dma_addr_upper = upper_32_bits(addr); + return false; } static inline bool page_pool_put(struct page_pool *pool) diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 887e7946a597..76481c465375 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -5,6 +5,7 @@ #include <linux/dma-direction.h> #include <linux/ptr_ring.h> +#include <linux/types.h> #define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA * map/unmap @@ -17,10 +18,8 @@ * Please note DMA-sync-for-CPU is still * device driver responsibility */ -#define PP_FLAG_PAGE_FRAG BIT(2) /* for page frag feature */ #define PP_FLAG_ALL (PP_FLAG_DMA_MAP |\ - PP_FLAG_DMA_SYNC_DEV |\ - PP_FLAG_PAGE_FRAG) + PP_FLAG_DMA_SYNC_DEV) /* * Fast allocation side cache array/stack @@ -45,29 +44,35 @@ struct pp_alloc_cache { /** * struct page_pool_params - page pool parameters - * @flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV, PP_FLAG_PAGE_FRAG + * @flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV * @order: 2^order pages on allocation * @pool_size: size of the ptr_ring * @nid: NUMA node id to allocate from pages from * @dev: device, for DMA pre-mapping purposes + * @netdev: netdev this pool will serve (leave as NULL if none or multiple) * @napi: NAPI which is the sole consumer of pages, otherwise NULL * @dma_dir: DMA mapping direction * @max_len: max DMA sync memory size for PP_FLAG_DMA_SYNC_DEV * @offset: DMA sync address offset for PP_FLAG_DMA_SYNC_DEV */ struct page_pool_params { - unsigned int flags; - unsigned int order; - unsigned int pool_size; - int nid; - struct device *dev; - struct napi_struct *napi; - enum dma_data_direction dma_dir; - unsigned int max_len; - unsigned int offset; + struct_group_tagged(page_pool_params_fast, fast, + unsigned int flags; + unsigned int order; + unsigned int pool_size; + int nid; + struct device *dev; + struct napi_struct *napi; + enum dma_data_direction dma_dir; + unsigned int max_len; + unsigned int offset; + ); + struct_group_tagged(page_pool_params_slow, slow, + struct net_device *netdev; /* private: used by test code only */ - void (*init_callback)(struct page *page, void *arg); - void *init_arg; + void (*init_callback)(struct page *page, void *arg); + void *init_arg; + ); }; #ifdef CONFIG_PAGE_POOL_STATS @@ -121,7 +126,9 @@ struct page_pool_stats { #endif struct page_pool { - struct page_pool_params p; + struct page_pool_params_fast p; + + bool has_init_callback; long frag_users; struct page *frag_page; @@ -180,6 +187,16 @@ struct page_pool { refcount_t user_cnt; u64 destroy_cnt; + + /* Slow/Control-path information follows */ + struct page_pool_params_slow slow; + /* User-facing fields, protected by page_pools_lock */ + struct { + struct hlist_node list; + u64 detach_time; + u32 napi_id; + u32 id; + } user; }; struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); @@ -217,9 +234,9 @@ static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data, } #endif -void page_pool_put_defragged_page(struct page_pool *pool, struct page *page, - unsigned int dma_sync_size, - bool allow_direct); +void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page, + unsigned int dma_sync_size, + bool allow_direct); static inline bool is_page_pool_compiled_in(void) { diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 15960564e0c3..1e200d9a066d 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -20,10 +20,10 @@ struct qdisc_walker { int (*fn)(struct Qdisc *, unsigned long cl, struct qdisc_walker *); }; -static inline void *qdisc_priv(struct Qdisc *q) -{ - return &q->privdata; -} +#define qdisc_priv(q) \ + _Generic(q, \ + const struct Qdisc * : (const void *)&q->privdata, \ + struct Qdisc * : (void *)&q->privdata) static inline struct Qdisc *qdisc_from_priv(void *priv) { @@ -275,24 +275,6 @@ static inline void skb_txtime_consumed(struct sk_buff *skb) skb->tstamp = ktime_set(0, 0); } -struct tc_skb_cb { - struct qdisc_skb_cb qdisc_cb; - - u16 mru; - u8 post_ct:1; - u8 post_ct_snat:1; - u8 post_ct_dnat:1; - u16 zone; /* Only valid if post_ct = true */ -}; - -static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb) -{ - struct tc_skb_cb *cb = (struct tc_skb_cb *)skb->cb; - - BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb)); - return cb; -} - static inline bool tc_qdisc_stats_dump(struct Qdisc *sch, unsigned long cl, struct qdisc_walker *arg) diff --git a/include/net/regulatory.h b/include/net/regulatory.h index b2cb4a9eb04d..ebf9e028d1ef 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -213,6 +213,7 @@ struct ieee80211_reg_rule { u32 flags; u32 dfs_cac_ms; bool has_wmm; + s8 psd; }; struct ieee80211_regdomain { diff --git a/include/net/route.h b/include/net/route.h index 51a45b1887b5..980ab474eabd 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -37,7 +37,7 @@ #define RTO_ONLINK 0x01 -#define RT_CONN_FLAGS(sk) (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE)) +#define RT_CONN_FLAGS(sk) (RT_TOS(READ_ONCE(inet_sk(sk)->tos)) | sock_flag(sk, SOCK_LOCALROUTE)) #define RT_CONN_FLAGS_TOS(sk,tos) (RT_TOS(tos) | sock_flag(sk, SOCK_LOCALROUTE)) static inline __u8 ip_sock_rt_scope(const struct sock *sk) @@ -50,7 +50,7 @@ static inline __u8 ip_sock_rt_scope(const struct sock *sk) static inline __u8 ip_sock_rt_tos(const struct sock *sk) { - return RT_TOS(inet_sk(sk)->tos); + return RT_TOS(READ_ONCE(inet_sk(sk)->tos)); } struct ip_tunnel_info; @@ -136,12 +136,6 @@ static inline struct rtable *__ip_route_output_key(struct net *net, struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, const struct sock *sk); -struct rtable *ip_route_output_tunnel(struct sk_buff *skb, - struct net_device *dev, - struct net *net, __be32 *saddr, - const struct ip_tunnel_info *info, - u8 protocol, bool use_cache); - struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index f232512505f8..934fdb977551 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -19,6 +19,7 @@ #include <net/gen_stats.h> #include <net/rtnetlink.h> #include <net/flow_offload.h> +#include <linux/xarray.h> struct Qdisc_ops; struct qdisc_walker; @@ -324,7 +325,6 @@ struct Qdisc_ops { struct module *owner; }; - struct tcf_result { union { struct { @@ -332,7 +332,6 @@ struct tcf_result { u32 classid; }; const struct tcf_proto *goto_tp; - }; }; @@ -376,6 +375,10 @@ struct tcf_proto_ops { struct nlattr **tca, struct netlink_ext_ack *extack); void (*tmplt_destroy)(void *tmplt_priv); + void (*tmplt_reoffload)(struct tcf_chain *chain, + bool add, + flow_setup_cb_t *cb, + void *cb_priv); struct tcf_exts * (*get_exts)(const struct tcf_proto *tp, u32 handle); @@ -458,6 +461,7 @@ struct tcf_chain { }; struct tcf_block { + struct xarray ports; /* datapath accessible */ /* Lock protects tcf_block and lifetime-management data of chains * attached to the block (refcnt, action_refcnt, explicitly_created). */ @@ -484,6 +488,8 @@ struct tcf_block { struct mutex proto_destroy_lock; /* Lock for proto_destroy hashtable. */ }; +struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index); + static inline bool lockdep_tcf_chain_is_locked(struct tcf_chain *chain) { return lockdep_is_held(&chain->filter_chain_lock); @@ -587,6 +593,7 @@ static inline void sch_tree_unlock(struct Qdisc *q) extern struct Qdisc noop_qdisc; extern struct Qdisc_ops noop_qdisc_ops; extern struct Qdisc_ops pfifo_fast_ops; +extern const u8 sch_default_prio2band[TC_PRIO_MAX + 1]; extern struct Qdisc_ops mq_qdisc_ops; extern struct Qdisc_ops noqueue_qdisc_ops; extern const struct Qdisc_ops *default_qdisc_ops; @@ -1037,6 +1044,37 @@ static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch) return skb; } +struct tc_skb_cb { + struct qdisc_skb_cb qdisc_cb; + u32 drop_reason; + + u16 zone; /* Only valid if post_ct = true */ + u16 mru; + u8 post_ct:1; + u8 post_ct_snat:1; + u8 post_ct_dnat:1; +}; + +static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb) +{ + struct tc_skb_cb *cb = (struct tc_skb_cb *)skb->cb; + + BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb)); + return cb; +} + +static inline enum skb_drop_reason +tcf_get_drop_reason(const struct sk_buff *skb) +{ + return tc_skb_cb(skb)->drop_reason; +} + +static inline void tcf_set_drop_reason(const struct sk_buff *skb, + enum skb_drop_reason reason) +{ + tc_skb_cb(skb)->drop_reason = reason; +} + /* Instead of calling kfree_skb() while root qdisc lock is held, * queue the skb for future freeing at end of __dev_xmit_skb() */ diff --git a/include/net/scm.h b/include/net/scm.h index c5bcdf65f55c..cf68acec4d70 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -5,10 +5,12 @@ #include <linux/limits.h> #include <linux/net.h> #include <linux/cred.h> +#include <linux/file.h> #include <linux/security.h> #include <linux/pid.h> #include <linux/nsproxy.h> #include <linux/sched/signal.h> +#include <net/compat.h> /* Well, we should have at least one descriptor open * to accept passed FDs 8) @@ -123,14 +125,17 @@ static inline bool scm_has_secdata(struct socket *sock) static __inline__ void scm_pidfd_recv(struct msghdr *msg, struct scm_cookie *scm) { struct file *pidfd_file = NULL; - int pidfd; + int len, pidfd; - /* - * put_cmsg() doesn't return an error if CMSG is truncated, + /* put_cmsg() doesn't return an error if CMSG is truncated, * that's why we need to opencode these checks here. */ - if ((msg->msg_controllen <= sizeof(struct cmsghdr)) || - (msg->msg_controllen - sizeof(struct cmsghdr)) < sizeof(int)) { + if (msg->msg_flags & MSG_CMSG_COMPAT) + len = sizeof(struct compat_cmsghdr) + sizeof(int); + else + len = sizeof(struct cmsghdr) + sizeof(int); + + if (msg->msg_controllen < len) { msg->msg_flags |= MSG_CTRUNC; return; } @@ -204,5 +209,13 @@ static inline void scm_recv_unix(struct socket *sock, struct msghdr *msg, scm_destroy_cred(scm); } +static inline int scm_recv_one_fd(struct file *f, int __user *ufd, + unsigned int flags) +{ + if (!ufd) + return -EFAULT; + return receive_fd(f, ufd, flags); +} + #endif /* __LINUX_NET_SCM_H */ diff --git a/include/net/smc.h b/include/net/smc.h index a002552be29c..c9dcb30e3fd9 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -52,9 +52,14 @@ struct smcd_dmb { struct smcd_dev; struct ism_client; +struct smcd_gid { + u64 gid; + u64 gid_ext; +}; + struct smcd_ops { - int (*query_remote_gid)(struct smcd_dev *dev, u64 rgid, u32 vid_valid, - u32 vid); + int (*query_remote_gid)(struct smcd_dev *dev, struct smcd_gid *rgid, + u32 vid_valid, u32 vid); int (*register_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb, struct ism_client *client); int (*unregister_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb); @@ -62,14 +67,13 @@ struct smcd_ops { int (*del_vlan_id)(struct smcd_dev *dev, u64 vlan_id); int (*set_vlan_required)(struct smcd_dev *dev); int (*reset_vlan_required)(struct smcd_dev *dev); - int (*signal_event)(struct smcd_dev *dev, u64 rgid, u32 trigger_irq, - u32 event_code, u64 info); + int (*signal_event)(struct smcd_dev *dev, struct smcd_gid *rgid, + u32 trigger_irq, u32 event_code, u64 info); int (*move_data)(struct smcd_dev *dev, u64 dmb_tok, unsigned int idx, bool sf, unsigned int offset, void *data, unsigned int size); int (*supports_v2)(void); - u8* (*get_system_eid)(void); - u64 (*get_local_gid)(struct smcd_dev *dev); + void (*get_local_gid)(struct smcd_dev *dev, struct smcd_gid *gid); u16 (*get_chid)(struct smcd_dev *dev); struct device* (*get_dev)(struct smcd_dev *dev); }; diff --git a/include/net/sock.h b/include/net/sock.h index 11d503417591..54ca8dcbfb43 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -76,19 +76,6 @@ * the other protocols. */ -/* Define this to get the SOCK_DBG debugging facility. */ -#define SOCK_DEBUGGING -#ifdef SOCK_DEBUGGING -#define SOCK_DEBUG(sk, msg...) do { if ((sk) && sock_flag((sk), SOCK_DBG)) \ - printk(KERN_DEBUG msg); } while (0) -#else -/* Validate arguments and do nothing */ -static inline __printf(2, 3) -void SOCK_DEBUG(const struct sock *sk, const char *msg, ...) -{ -} -#endif - /* This is the per-socket lock. The spinlock provides a synchronization * between user contexts and software interrupt processing, whereas the * mini-semaphore synchronizes multiple users amongst themselves. @@ -277,8 +264,6 @@ struct sk_filter; * @sk_pacing_status: Pacing status (requested, handled by sch_fq) * @sk_max_pacing_rate: Maximum pacing rate (%SO_MAX_PACING_RATE) * @sk_sndbuf: size of send buffer in bytes - * @__sk_flags_offset: empty field used to determine location of bitfield - * @sk_padding: unused element for alignment * @sk_no_check_tx: %SO_NO_CHECK setting, set checksum in TX packets * @sk_no_check_rx: allow zero checksum in RX packets * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) @@ -336,7 +321,7 @@ struct sk_filter; * @sk_cgrp_data: cgroup data for this cgroup * @sk_memcg: this socket's memory cgroup association * @sk_write_pending: a write to stream socket waits to start - * @sk_wait_pending: number of threads blocked on this socket + * @sk_disconnects: number of disconnect operations performed on this sock * @sk_state_change: callback to indicate change in the state of the sock * @sk_data_ready: callback to indicate there is data to be processed * @sk_write_space: callback to indicate there is bf sending space available @@ -352,7 +337,6 @@ struct sk_filter; * @sk_txtime_report_errors: set report errors mode for SO_TXTIME * @sk_txtime_unused: unused txtime flags * @ns_tracker: tracker for netns reference - * @sk_bind2_node: bind node in the bhash2 table */ struct sock { /* @@ -429,7 +413,7 @@ struct sock { unsigned int sk_napi_id; #endif int sk_rcvbuf; - int sk_wait_pending; + int sk_disconnects; struct sk_filter __rcu *sk_filter; union { @@ -544,7 +528,6 @@ struct sock { #endif struct rcu_head sk_rcu; netns_tracker ns_tracker; - struct hlist_node sk_bind2_node; }; enum sk_pacing { @@ -873,16 +856,6 @@ static inline void sk_add_bind_node(struct sock *sk, hlist_add_head(&sk->sk_bind_node, list); } -static inline void __sk_del_bind2_node(struct sock *sk) -{ - __hlist_del(&sk->sk_bind2_node); -} - -static inline void sk_add_bind2_node(struct sock *sk, struct hlist_head *list) -{ - hlist_add_head(&sk->sk_bind2_node, list); -} - #define sk_for_each(__sk, list) \ hlist_for_each_entry(__sk, list, sk_node) #define sk_for_each_rcu(__sk, list) \ @@ -900,8 +873,6 @@ static inline void sk_add_bind2_node(struct sock *sk, struct hlist_head *list) hlist_for_each_entry_safe(__sk, tmp, list, sk_node) #define sk_for_each_bound(__sk, list) \ hlist_for_each_entry(__sk, list, sk_bind_node) -#define sk_for_each_bound_bhash2(__sk, list) \ - hlist_for_each_entry(__sk, list, sk_bind2_node) /** * sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset @@ -1053,6 +1024,12 @@ static inline void sk_wmem_queued_add(struct sock *sk, int val) WRITE_ONCE(sk->sk_wmem_queued, sk->sk_wmem_queued + val); } +static inline void sk_forward_alloc_add(struct sock *sk, int val) +{ + /* Paired with lockless reads of sk->sk_forward_alloc */ + WRITE_ONCE(sk->sk_forward_alloc, sk->sk_forward_alloc + val); +} + void sk_stream_write_space(struct sock *sk); /* OOB backlog add */ @@ -1183,8 +1160,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk) } #define sk_wait_event(__sk, __timeo, __condition, __wait) \ - ({ int __rc; \ - __sk->sk_wait_pending++; \ + ({ int __rc, __dis = __sk->sk_disconnects; \ release_sock(__sk); \ __rc = __condition; \ if (!__rc) { \ @@ -1194,8 +1170,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk) } \ sched_annotate_sleep(); \ lock_sock(__sk); \ - __sk->sk_wait_pending--; \ - __rc = __condition; \ + __rc = __dis == __sk->sk_disconnects ? __condition : -EPIPE; \ __rc; \ }) @@ -1377,7 +1352,7 @@ static inline int sk_forward_alloc_get(const struct sock *sk) if (sk->sk_prot->forward_alloc_get) return sk->sk_prot->forward_alloc_get(sk); #endif - return sk->sk_forward_alloc; + return READ_ONCE(sk->sk_forward_alloc); } static inline bool __sk_stream_memory_free(const struct sock *sk, int wake) @@ -1673,14 +1648,14 @@ static inline void sk_mem_charge(struct sock *sk, int size) { if (!sk_has_account(sk)) return; - sk->sk_forward_alloc -= size; + sk_forward_alloc_add(sk, -size); } static inline void sk_mem_uncharge(struct sock *sk, int size) { if (!sk_has_account(sk)) return; - sk->sk_forward_alloc += size; + sk_forward_alloc_add(sk, size); sk_mem_reclaim(sk); } @@ -1817,12 +1792,11 @@ static inline bool sock_owned_by_user_nocheck(const struct sock *sk) static inline void sock_release_ownership(struct sock *sk) { - if (sock_owned_by_user_nocheck(sk)) { - sk->sk_lock.owned = 0; + DEBUG_NET_WARN_ON_ONCE(!sock_owned_by_user_nocheck(sk)); + sk->sk_lock.owned = 0; - /* The sk_lock has mutex_unlock() semantics: */ - mutex_release(&sk->sk_lock.dep_map, _RET_IP_); - } + /* The sk_lock has mutex_unlock() semantics: */ + mutex_release(&sk->sk_lock.dep_map, _RET_IP_); } /* no reclassification while locks are held */ @@ -1861,11 +1835,13 @@ int sk_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); int sock_setsockopt(struct socket *sock, int level, int op, sockptr_t optval, unsigned int optlen); +int do_sock_setsockopt(struct socket *sock, bool compat, int level, + int optname, sockptr_t optval, int optlen); +int do_sock_getsockopt(struct socket *sock, bool compat, int level, + int optname, sockptr_t optval, sockptr_t optlen); int sk_getsockopt(struct sock *sk, int level, int optname, sockptr_t optval, sockptr_t optlen); -int sock_getsockopt(struct socket *sock, int level, int op, - char __user *optval, int __user *optlen); int sock_gettstamp(struct socket *sock, void __user *userstamp, bool timeval, bool time32); struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, @@ -1900,7 +1876,9 @@ struct sockcm_cookie { static inline void sockcm_init(struct sockcm_cookie *sockc, const struct sock *sk) { - *sockc = (struct sockcm_cookie) { .tsflags = sk->sk_tsflags }; + *sockc = (struct sockcm_cookie) { + .tsflags = READ_ONCE(sk->sk_tsflags) + }; } int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg, @@ -2000,21 +1978,33 @@ static inline void sk_tx_queue_set(struct sock *sk, int tx_queue) /* sk_tx_queue_mapping accept only upto a 16-bit value */ if (WARN_ON_ONCE((unsigned short)tx_queue >= USHRT_MAX)) return; - sk->sk_tx_queue_mapping = tx_queue; + /* Paired with READ_ONCE() in sk_tx_queue_get() and + * other WRITE_ONCE() because socket lock might be not held. + */ + WRITE_ONCE(sk->sk_tx_queue_mapping, tx_queue); } #define NO_QUEUE_MAPPING USHRT_MAX static inline void sk_tx_queue_clear(struct sock *sk) { - sk->sk_tx_queue_mapping = NO_QUEUE_MAPPING; + /* Paired with READ_ONCE() in sk_tx_queue_get() and + * other WRITE_ONCE() because socket lock might be not held. + */ + WRITE_ONCE(sk->sk_tx_queue_mapping, NO_QUEUE_MAPPING); } static inline int sk_tx_queue_get(const struct sock *sk) { - if (sk && sk->sk_tx_queue_mapping != NO_QUEUE_MAPPING) - return sk->sk_tx_queue_mapping; + if (sk) { + /* Paired with WRITE_ONCE() in sk_tx_queue_clear() + * and sk_tx_queue_set(). + */ + int val = READ_ONCE(sk->sk_tx_queue_mapping); + if (val != NO_QUEUE_MAPPING) + return val; + } return -1; } @@ -2134,14 +2124,14 @@ static inline bool sk_rethink_txhash(struct sock *sk) } static inline struct dst_entry * -__sk_dst_get(struct sock *sk) +__sk_dst_get(const struct sock *sk) { return rcu_dereference_check(sk->sk_dst_cache, lockdep_sock_is_held(sk)); } static inline struct dst_entry * -sk_dst_get(struct sock *sk) +sk_dst_get(const struct sock *sk) { struct dst_entry *dst; @@ -2163,7 +2153,7 @@ static inline void __dst_negative_advice(struct sock *sk) if (ndst != dst) { rcu_assign_pointer(sk->sk_dst_cache, ndst); sk_tx_queue_clear(sk); - sk->sk_dst_pending_confirm = 0; + WRITE_ONCE(sk->sk_dst_pending_confirm, 0); } } } @@ -2180,7 +2170,7 @@ __sk_dst_set(struct sock *sk, struct dst_entry *dst) struct dst_entry *old_dst; sk_tx_queue_clear(sk); - sk->sk_dst_pending_confirm = 0; + WRITE_ONCE(sk->sk_dst_pending_confirm, 0); old_dst = rcu_dereference_protected(sk->sk_dst_cache, lockdep_sock_is_held(sk)); rcu_assign_pointer(sk->sk_dst_cache, dst); @@ -2193,7 +2183,7 @@ sk_dst_set(struct sock *sk, struct dst_entry *dst) struct dst_entry *old_dst; sk_tx_queue_clear(sk); - sk->sk_dst_pending_confirm = 0; + WRITE_ONCE(sk->sk_dst_pending_confirm, 0); old_dst = xchg((__force struct dst_entry **)&sk->sk_dst_cache, dst); dst_release(old_dst); } @@ -2231,7 +2221,7 @@ static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n) } } -bool sk_mc_loop(struct sock *sk); +bool sk_mc_loop(const struct sock *sk); static inline bool sk_can_gso(const struct sock *sk) { @@ -2695,9 +2685,9 @@ void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk, static inline void sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { - ktime_t kt = skb->tstamp; struct skb_shared_hwtstamps *hwtstamps = skb_hwtstamps(skb); - + u32 tsflags = READ_ONCE(sk->sk_tsflags); + ktime_t kt = skb->tstamp; /* * generate control messages if * - receive time stamping in software requested @@ -2705,10 +2695,10 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) * - hardware time stamps available and wanted */ if (sock_flag(sk, SOCK_RCVTSTAMP) || - (sk->sk_tsflags & SOF_TIMESTAMPING_RX_SOFTWARE) || - (kt && sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) || + (tsflags & SOF_TIMESTAMPING_RX_SOFTWARE) || + (kt && tsflags & SOF_TIMESTAMPING_SOFTWARE) || (hwtstamps->hwtstamp && - (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE))) + (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE))) __sock_recv_timestamp(msg, sk, skb); else sock_write_timestamp(sk, kt); @@ -2730,7 +2720,8 @@ static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk, #define TSFLAGS_ANY (SOF_TIMESTAMPING_SOFTWARE | \ SOF_TIMESTAMPING_RAW_HARDWARE) - if (sk->sk_flags & FLAGS_RECV_CMSGS || sk->sk_tsflags & TSFLAGS_ANY) + if (sk->sk_flags & FLAGS_RECV_CMSGS || + READ_ONCE(sk->sk_tsflags) & TSFLAGS_ANY) __sock_recv_cmsgs(msg, sk, skb); else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP))) sock_write_timestamp(sk, skb->tstamp); @@ -2774,9 +2765,30 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags) &skb_shinfo(skb)->tskey); } +static inline bool sk_is_inet(const struct sock *sk) +{ + int family = READ_ONCE(sk->sk_family); + + return family == AF_INET || family == AF_INET6; +} + static inline bool sk_is_tcp(const struct sock *sk) { - return sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP; + return sk_is_inet(sk) && + sk->sk_type == SOCK_STREAM && + sk->sk_protocol == IPPROTO_TCP; +} + +static inline bool sk_is_udp(const struct sock *sk) +{ + return sk_is_inet(sk) && + sk->sk_type == SOCK_DGRAM && + sk->sk_protocol == IPPROTO_UDP; +} + +static inline bool sk_is_stream_unix(const struct sock *sk) +{ + return sk->sk_family == AF_UNIX && sk->sk_type == SOCK_STREAM; } /** @@ -2900,7 +2912,6 @@ extern __u32 sysctl_wmem_max; extern __u32 sysctl_rmem_max; extern int sysctl_tstamp_allow_data; -extern int sysctl_optmem_max; extern __u32 sysctl_wmem_default; extern __u32 sysctl_rmem_default; diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h index b24ea2d9400b..77f87c622a2e 100644 --- a/include/net/tc_act/tc_ct.h +++ b/include/net/tc_act/tc_ct.h @@ -22,6 +22,7 @@ struct tcf_ct_params { struct nf_nat_range2 range; bool ipv4_range; + bool put_labels; u16 ct_action; @@ -57,6 +58,11 @@ static inline struct nf_flowtable *tcf_ct_ft(const struct tc_action *a) return to_ct_params(a)->nf_ft; } +static inline struct nf_conntrack_helper *tcf_ct_helper(const struct tc_action *a) +{ + return to_ct_params(a)->helper; +} + #else static inline uint16_t tcf_ct_zone(const struct tc_action *a) { return 0; } static inline int tcf_ct_action(const struct tc_action *a) { return 0; } @@ -64,6 +70,10 @@ static inline struct nf_flowtable *tcf_ct_ft(const struct tc_action *a) { return NULL; } +static inline struct nf_conntrack_helper *tcf_ct_helper(const struct tc_action *a) +{ + return NULL; +} #endif /* CONFIG_NF_CONNTRACK */ #if IS_ENABLED(CONFIG_NET_ACT_CT) diff --git a/include/net/tc_act/tc_ipt.h b/include/net/tc_act/tc_ipt.h deleted file mode 100644 index 4225fcb1c6ba..000000000000 --- a/include/net/tc_act/tc_ipt.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __NET_TC_IPT_H -#define __NET_TC_IPT_H - -#include <net/act_api.h> - -struct xt_entry_target; - -struct tcf_ipt { - struct tc_action common; - u32 tcfi_hook; - char *tcfi_tname; - struct xt_entry_target *tcfi_t; -}; -#define to_ipt(a) ((struct tcf_ipt *)a) - -#endif /* __NET_TC_IPT_H */ diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h index 32ce8ea36950..75722d967bf2 100644 --- a/include/net/tc_act/tc_mirred.h +++ b/include/net/tc_act/tc_mirred.h @@ -8,6 +8,7 @@ struct tcf_mirred { struct tc_action common; int tcfm_eaction; + u32 tcfm_blockid; bool tcfm_mac_header_xmit; struct net_device __rcu *tcfm_dev; netdevice_tracker tcfm_dev_tracker; diff --git a/include/net/tc_wrapper.h b/include/net/tc_wrapper.h index a6d481b5bcbc..a608546bcefc 100644 --- a/include/net/tc_wrapper.h +++ b/include/net/tc_wrapper.h @@ -117,10 +117,6 @@ static inline int tc_act(struct sk_buff *skb, const struct tc_action *a, if (a->ops->act == tcf_ife_act) return tcf_ife_act(skb, a, res); #endif -#if IS_BUILTIN(CONFIG_NET_ACT_IPT) - if (a->ops->act == tcf_ipt_act) - return tcf_ipt_act(skb, a, res); -#endif #if IS_BUILTIN(CONFIG_NET_ACT_SIMP) if (a->ops->act == tcf_simp_act) return tcf_simp_act(skb, a, res); diff --git a/include/net/tcp.h b/include/net/tcp.h index 91688d0dadcd..dd78a1181031 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -37,6 +37,7 @@ #include <net/snmp.h> #include <net/ip.h> #include <net/tcp_states.h> +#include <net/tcp_ao.h> #include <net/inet_ecn.h> #include <net/dst.h> #include <net/mptcp.h> @@ -131,6 +132,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCP_FIN_TIMEOUT_MAX (120 * HZ) /* max TCP_LINGER2 value (two minutes) */ #define TCP_DELACK_MAX ((unsigned)(HZ/5)) /* maximal time to delay before sending an ACK */ +static_assert((1 << ATO_BITS) > TCP_DELACK_MAX); + #if HZ >= 100 #define TCP_DELACK_MIN ((unsigned)(HZ/25)) /* minimal time to delay before sending an ACK */ #define TCP_ATO_MIN ((unsigned)(HZ/25)) @@ -141,6 +144,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCP_RTO_MAX ((unsigned)(120*HZ)) #define TCP_RTO_MIN ((unsigned)(HZ/5)) #define TCP_TIMEOUT_MIN (2U) /* Min timeout for TCP timers in jiffies */ + +#define TCP_TIMEOUT_MIN_US (2*USEC_PER_MSEC) /* Min TCP timeout in microsecs */ + #define TCP_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC6298 2.1 initial RTO value */ #define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ)) /* RFC 1122 initial RTO value, now * used as a fallback RTO for the @@ -161,7 +167,12 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define MAX_TCP_KEEPCNT 127 #define MAX_TCP_SYNCNT 127 -#define TCP_PAWS_24DAYS (60 * 60 * 24 * 24) +/* Ensure that TCP PAWS checks are relaxed after ~2147 seconds + * to avoid overflows. This assumes a clock smaller than 1 Mhz. + * Default clock is 1 Khz, tcp_usec_ts uses 1 Mhz. + */ +#define TCP_PAWS_WRAP (INT_MAX / USEC_PER_SEC) + #define TCP_PAWS_MSL 60 /* Per-host timestamps are invalidated * after this time. It should be equal * (or greater than) TCP_TIMEWAIT_LEN @@ -184,6 +195,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOPT_SACK 5 /* SACK Block */ #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ #define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */ +#define TCPOPT_AO 29 /* Authentication Option (RFC5925) */ #define TCPOPT_MPTCP 30 /* Multipath TCP (RFC6824) */ #define TCPOPT_FASTOPEN 34 /* Fast open (RFC7413) */ #define TCPOPT_EXP 254 /* Experimental */ @@ -348,12 +360,14 @@ ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos, struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, gfp_t gfp, bool force_schedule); -static inline void tcp_dec_quickack_mode(struct sock *sk, - const unsigned int pkts) +static inline void tcp_dec_quickack_mode(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); if (icsk->icsk_ack.quick) { + /* How many ACKs S/ACKing new data have we sent? */ + const unsigned int pkts = inet_csk_ack_scheduled(sk) ? 1 : 0; + if (pkts >= icsk->icsk_ack.quick) { icsk->icsk_ack.quick = 0; /* Leaving quickack mode we deflate ATO. */ @@ -424,7 +438,6 @@ int tcp_mmap(struct file *file, struct socket *sock, void tcp_parse_options(const struct net *net, const struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab, struct tcp_fastopen_cookie *foc); -const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); /* * BPF SKB-less helpers @@ -477,13 +490,14 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb); /* From syncookies.c */ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct dst_entry *dst, u32 tsoff); -int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, - u32 cookie); + struct dst_entry *dst); +int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th); struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, - const struct tcp_request_sock_ops *af_ops, - struct sock *sk, struct sk_buff *skb); + struct sock *sk, struct sk_buff *skb, + struct tcp_options_received *tcp_opt, + int mss, u32 tsoff); + #ifdef CONFIG_SYN_COOKIES /* Syncookies use a monotonic timer which increments every 60 seconds. @@ -569,12 +583,15 @@ __u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss); u64 cookie_init_timestamp(struct request_sock *req, u64 now); bool cookie_timestamp_decode(const struct net *net, struct tcp_options_received *opt); -bool cookie_ecn_ok(const struct tcp_options_received *opt, - const struct net *net, const struct dst_entry *dst); + +static inline bool cookie_ecn_ok(const struct net *net, const struct dst_entry *dst) +{ + return READ_ONCE(net->ipv4.sysctl_tcp_ecn) || + dst_feature(dst, RTAX_FEATURE_ECN); +} /* From net/ipv6/syncookies.c */ -int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th, - u32 cookie); +int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th); struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb); u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph, @@ -718,8 +735,10 @@ static inline void tcp_fast_path_check(struct sock *sk) tcp_fast_path_on(tp); } +u32 tcp_delack_max(const struct sock *sk); + /* Compute the actual rto_min value */ -static inline u32 tcp_rto_min(struct sock *sk) +static inline u32 tcp_rto_min(const struct sock *sk) { const struct dst_entry *dst = __sk_dst_get(sk); u32 rto_min = inet_csk(sk)->icsk_rto_min; @@ -729,7 +748,7 @@ static inline u32 tcp_rto_min(struct sock *sk) return rto_min; } -static inline u32 tcp_rto_min_us(struct sock *sk) +static inline u32 tcp_rto_min_us(const struct sock *sk) { return jiffies_to_usecs(tcp_rto_min(sk)); } @@ -789,22 +808,31 @@ static inline u64 tcp_clock_us(void) return div_u64(tcp_clock_ns(), NSEC_PER_USEC); } -/* This should only be used in contexts where tp->tcp_mstamp is up to date */ -static inline u32 tcp_time_stamp(const struct tcp_sock *tp) +static inline u64 tcp_clock_ms(void) { - return div_u64(tp->tcp_mstamp, USEC_PER_SEC / TCP_TS_HZ); + return div_u64(tcp_clock_ns(), NSEC_PER_MSEC); } -/* Convert a nsec timestamp into TCP TSval timestamp (ms based currently) */ -static inline u32 tcp_ns_to_ts(u64 ns) +/* TCP Timestamp included in TS option (RFC 1323) can either use ms + * or usec resolution. Each socket carries a flag to select one or other + * resolution, as the route attribute could change anytime. + * Each flow must stick to initial resolution. + */ +static inline u32 tcp_clock_ts(bool usec_ts) { - return div_u64(ns, NSEC_PER_SEC / TCP_TS_HZ); + return usec_ts ? tcp_clock_us() : tcp_clock_ms(); } -/* Could use tcp_clock_us() / 1000, but this version uses a single divide */ -static inline u32 tcp_time_stamp_raw(void) +static inline u32 tcp_time_stamp_ms(const struct tcp_sock *tp) { - return tcp_ns_to_ts(tcp_clock_ns()); + return div_u64(tp->tcp_mstamp, USEC_PER_MSEC); +} + +static inline u32 tcp_time_stamp_ts(const struct tcp_sock *tp) +{ + if (tp->tcp_usec_ts) + return tp->tcp_mstamp; + return tcp_time_stamp_ms(tp); } void tcp_mstamp_refresh(struct tcp_sock *tp); @@ -814,17 +842,30 @@ static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0) return max_t(s64, t1 - t0, 0); } -static inline u32 tcp_skb_timestamp(const struct sk_buff *skb) -{ - return tcp_ns_to_ts(skb->skb_mstamp_ns); -} - /* provide the departure time in us unit */ static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb) { return div_u64(skb->skb_mstamp_ns, NSEC_PER_USEC); } +/* Provide skb TSval in usec or ms unit */ +static inline u32 tcp_skb_timestamp_ts(bool usec_ts, const struct sk_buff *skb) +{ + if (usec_ts) + return tcp_skb_timestamp_us(skb); + + return div_u64(skb->skb_mstamp_ns, NSEC_PER_MSEC); +} + +static inline u32 tcp_tw_tsval(const struct tcp_timewait_sock *tcptw) +{ + return tcp_clock_ts(tcptw->tw_sk.tw_usec_ts) + tcptw->tw_ts_offset; +} + +static inline u32 tcp_rsk_tsval(const struct tcp_request_sock *treq) +{ + return tcp_clock_ts(treq->req_usec_ts) + treq->ts_off; +} #define tcp_flag_byte(th) (((u_int8_t *)th)[13]) @@ -1453,13 +1494,15 @@ static inline int tcp_space_from_win(const struct sock *sk, int win) return __tcp_space_from_win(tcp_sk(sk)->scaling_ratio, win); } +/* Assume a conservative default of 1200 bytes of payload per 4K page. + * This may be adjusted later in tcp_measure_rcv_mss(). + */ +#define TCP_DEFAULT_SCALING_RATIO ((1200 << TCP_RMEM_TO_WIN_SCALE) / \ + SKB_TRUESIZE(4096)) + static inline void tcp_scaling_ratio_init(struct sock *sk) { - /* Assume a conservative default of 1200 bytes of payload per 4K page. - * This may be adjusted later in tcp_measure_rcv_mss(). - */ - tcp_sk(sk)->scaling_ratio = (1200 << TCP_RMEM_TO_WIN_SCALE) / - SKB_TRUESIZE(4096); + tcp_sk(sk)->scaling_ratio = TCP_DEFAULT_SCALING_RATIO; } /* Note: caller must be prepared to deal with negative returns */ @@ -1475,17 +1518,22 @@ static inline int tcp_full_space(const struct sock *sk) return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf)); } -static inline void tcp_adjust_rcv_ssthresh(struct sock *sk) +static inline void __tcp_adjust_rcv_ssthresh(struct sock *sk, u32 new_ssthresh) { int unused_mem = sk_unused_reserved_mem(sk); struct tcp_sock *tp = tcp_sk(sk); - tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); + tp->rcv_ssthresh = min(tp->rcv_ssthresh, new_ssthresh); if (unused_mem) tp->rcv_ssthresh = max_t(u32, tp->rcv_ssthresh, tcp_win_from_space(sk, unused_mem)); } +static inline void tcp_adjust_rcv_ssthresh(struct sock *sk) +{ + __tcp_adjust_rcv_ssthresh(sk, 4U * tcp_sk(sk)->advmss); +} + void tcp_cleanup_rbuf(struct sock *sk, int copied); void __tcp_cleanup_rbuf(struct sock *sk, int copied); @@ -1590,7 +1638,7 @@ static inline bool tcp_paws_check(const struct tcp_options_received *rx_opt, if ((s32)(rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win) return true; if (unlikely(!time_before32(ktime_get_seconds(), - rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS))) + rx_opt->ts_recent_stamp + TCP_PAWS_WRAP))) return true; /* * Some OSes send SYN and SYNACK messages with tsval=0 tsecr=0, @@ -1650,12 +1698,7 @@ static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp) tp->retransmit_skb_hint = NULL; } -union tcp_md5_addr { - struct in_addr a4; -#if IS_ENABLED(CONFIG_IPV6) - struct in6_addr a6; -#endif -}; +#define tcp_md5_addr tcp_ao_addr /* - key database */ struct tcp_md5sig_key { @@ -1699,12 +1742,39 @@ union tcp_md5sum_block { #endif }; -/* - pool: digest algorithm, hash description and scratch buffer */ -struct tcp_md5sig_pool { - struct ahash_request *md5_req; - void *scratch; +/* + * struct tcp_sigpool - per-CPU pool of ahash_requests + * @scratch: per-CPU temporary area, that can be used between + * tcp_sigpool_start() and tcp_sigpool_end() to perform + * crypto request + * @req: pre-allocated ahash request + */ +struct tcp_sigpool { + void *scratch; + struct ahash_request *req; }; +int tcp_sigpool_alloc_ahash(const char *alg, size_t scratch_size); +void tcp_sigpool_get(unsigned int id); +void tcp_sigpool_release(unsigned int id); +int tcp_sigpool_hash_skb_data(struct tcp_sigpool *hp, + const struct sk_buff *skb, + unsigned int header_len); + +/** + * tcp_sigpool_start - disable bh and start using tcp_sigpool_ahash + * @id: tcp_sigpool that was previously allocated by tcp_sigpool_alloc_ahash() + * @c: returned tcp_sigpool for usage (uninitialized on failure) + * + * Returns 0 on success, error otherwise. + */ +int tcp_sigpool_start(unsigned int id, struct tcp_sigpool *c); +/** + * tcp_sigpool_end - enable bh and stop using tcp_sigpool + * @c: tcp_sigpool context that was returned by tcp_sigpool_start() + */ +void tcp_sigpool_end(struct tcp_sigpool *c); +size_t tcp_sigpool_algo(unsigned int id, char *buf, size_t buf_len); /* - functions */ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, const struct sock *sk, const struct sk_buff *skb); @@ -1717,28 +1787,36 @@ int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr, int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family, u8 prefixlen, int l3index, u8 flags); +void tcp_clear_md5_list(struct sock *sk); struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, const struct sock *addr_sk); #ifdef CONFIG_TCP_MD5SIG -#include <linux/jump_label.h> -extern struct static_key_false_deferred tcp_md5_needed; struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index, const union tcp_md5_addr *addr, - int family); + int family, bool any_l3index); static inline struct tcp_md5sig_key * tcp_md5_do_lookup(const struct sock *sk, int l3index, const union tcp_md5_addr *addr, int family) { if (!static_branch_unlikely(&tcp_md5_needed.key)) return NULL; - return __tcp_md5_do_lookup(sk, l3index, addr, family); + return __tcp_md5_do_lookup(sk, l3index, addr, family, false); +} + +static inline struct tcp_md5sig_key * +tcp_md5_do_lookup_any_l3index(const struct sock *sk, + const union tcp_md5_addr *addr, int family) +{ + if (!static_branch_unlikely(&tcp_md5_needed.key)) + return NULL; + return __tcp_md5_do_lookup(sk, 0, addr, family, true); } enum skb_drop_reason tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, const void *saddr, const void *daddr, - int family, int dif, int sdif); + int family, int l3index, const __u8 *hash_location); #define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key) @@ -1750,27 +1828,29 @@ tcp_md5_do_lookup(const struct sock *sk, int l3index, return NULL; } +static inline struct tcp_md5sig_key * +tcp_md5_do_lookup_any_l3index(const struct sock *sk, + const union tcp_md5_addr *addr, int family) +{ + return NULL; +} + static inline enum skb_drop_reason tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, const void *saddr, const void *daddr, - int family, int dif, int sdif) + int family, int l3index, const __u8 *hash_location) { return SKB_NOT_DROPPED_YET; } #define tcp_twsk_md5_key(twsk) NULL #endif -bool tcp_alloc_md5sig_pool(void); +int tcp_md5_alloc_sigpool(void); +void tcp_md5_release_sigpool(void); +void tcp_md5_add_sigpool(void); +extern int tcp_md5_sigpool_id; -struct tcp_md5sig_pool *tcp_get_md5sig_pool(void); -static inline void tcp_put_md5sig_pool(void) -{ - local_bh_enable(); -} - -int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff *, - unsigned int header_len); -int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, +int tcp_md5_hash_key(struct tcp_sigpool *hp, const struct tcp_md5sig_key *key); /* From tcp_fastopen.c */ @@ -2075,7 +2155,11 @@ INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff)) INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)); INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff)); INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb)); +#ifdef CONFIG_INET void tcp_gro_complete(struct sk_buff *skb); +#else +static inline void tcp_gro_complete(struct sk_buff *skb) { } +#endif void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr); @@ -2115,6 +2199,18 @@ struct tcp_sock_af_ops { sockptr_t optval, int optlen); #endif +#ifdef CONFIG_TCP_AO + int (*ao_parse)(struct sock *sk, int optname, sockptr_t optval, int optlen); + struct tcp_ao_key *(*ao_lookup)(const struct sock *sk, + struct sock *addr_sk, + int sndid, int rcvid); + int (*ao_calc_key_sk)(struct tcp_ao_key *mkt, u8 *key, + const struct sock *sk, + __be32 sisn, __be32 disn, bool send); + int (*calc_ao_hash)(char *location, struct tcp_ao_key *ao, + const struct sock *sk, const struct sk_buff *skb, + const u8 *tkey, int hash_offset, u32 sne); +#endif }; struct tcp_request_sock_ops { @@ -2127,6 +2223,15 @@ struct tcp_request_sock_ops { const struct sock *sk, const struct sk_buff *skb); #endif +#ifdef CONFIG_TCP_AO + struct tcp_ao_key *(*ao_lookup)(const struct sock *sk, + struct request_sock *req, + int sndid, int rcvid); + int (*ao_calc_key)(struct tcp_ao_key *mkt, u8 *key, struct request_sock *sk); + int (*ao_synack_hash)(char *ao_hash, struct tcp_ao_key *mkt, + struct request_sock *req, const struct sk_buff *skb, + int hash_offset, u32 sne); +#endif #ifdef CONFIG_SYN_COOKIES __u32 (*cookie_init_seq)(const struct sk_buff *skb, __u16 *mss); @@ -2167,6 +2272,76 @@ static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops, } #endif +struct tcp_key { + union { + struct { + struct tcp_ao_key *ao_key; + char *traffic_key; + u32 sne; + u8 rcv_next; + }; + struct tcp_md5sig_key *md5_key; + }; + enum { + TCP_KEY_NONE = 0, + TCP_KEY_MD5, + TCP_KEY_AO, + } type; +}; + +static inline void tcp_get_current_key(const struct sock *sk, + struct tcp_key *out) +{ +#if defined(CONFIG_TCP_AO) || defined(CONFIG_TCP_MD5SIG) + const struct tcp_sock *tp = tcp_sk(sk); +#endif + +#ifdef CONFIG_TCP_AO + if (static_branch_unlikely(&tcp_ao_needed.key)) { + struct tcp_ao_info *ao; + + ao = rcu_dereference_protected(tp->ao_info, + lockdep_sock_is_held(sk)); + if (ao) { + out->ao_key = READ_ONCE(ao->current_key); + out->type = TCP_KEY_AO; + return; + } + } +#endif +#ifdef CONFIG_TCP_MD5SIG + if (static_branch_unlikely(&tcp_md5_needed.key) && + rcu_access_pointer(tp->md5sig_info)) { + out->md5_key = tp->af_specific->md5_lookup(sk, sk); + if (out->md5_key) { + out->type = TCP_KEY_MD5; + return; + } + } +#endif + out->type = TCP_KEY_NONE; +} + +static inline bool tcp_key_is_md5(const struct tcp_key *key) +{ +#ifdef CONFIG_TCP_MD5SIG + if (static_branch_unlikely(&tcp_md5_needed.key) && + key->type == TCP_KEY_MD5) + return true; +#endif + return false; +} + +static inline bool tcp_key_is_ao(const struct tcp_key *key) +{ +#ifdef CONFIG_TCP_AO + if (static_branch_unlikely(&tcp_ao_needed.key) && + key->type == TCP_KEY_AO) + return true; +#endif + return false; +} + int tcpv4_offload_init(void); void tcp_v4_init(void); @@ -2525,4 +2700,116 @@ static inline u64 tcp_transmit_time(const struct sock *sk) return 0; } +static inline int tcp_parse_auth_options(const struct tcphdr *th, + const u8 **md5_hash, const struct tcp_ao_hdr **aoh) +{ + const u8 *md5_tmp, *ao_tmp; + int ret; + + ret = tcp_do_parse_auth_options(th, &md5_tmp, &ao_tmp); + if (ret) + return ret; + + if (md5_hash) + *md5_hash = md5_tmp; + + if (aoh) { + if (!ao_tmp) + *aoh = NULL; + else + *aoh = (struct tcp_ao_hdr *)(ao_tmp - 2); + } + + return 0; +} + +static inline bool tcp_ao_required(struct sock *sk, const void *saddr, + int family, int l3index, bool stat_inc) +{ +#ifdef CONFIG_TCP_AO + struct tcp_ao_info *ao_info; + struct tcp_ao_key *ao_key; + + if (!static_branch_unlikely(&tcp_ao_needed.key)) + return false; + + ao_info = rcu_dereference_check(tcp_sk(sk)->ao_info, + lockdep_sock_is_held(sk)); + if (!ao_info) + return false; + + ao_key = tcp_ao_do_lookup(sk, l3index, saddr, family, -1, -1); + if (ao_info->ao_required || ao_key) { + if (stat_inc) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOREQUIRED); + atomic64_inc(&ao_info->counters.ao_required); + } + return true; + } +#endif + return false; +} + +/* Called with rcu_read_lock() */ +static inline enum skb_drop_reason +tcp_inbound_hash(struct sock *sk, const struct request_sock *req, + const struct sk_buff *skb, + const void *saddr, const void *daddr, + int family, int dif, int sdif) +{ + const struct tcphdr *th = tcp_hdr(skb); + const struct tcp_ao_hdr *aoh; + const __u8 *md5_location; + int l3index; + + /* Invalid option or two times meet any of auth options */ + if (tcp_parse_auth_options(th, &md5_location, &aoh)) { + tcp_hash_fail("TCP segment has incorrect auth options set", + family, skb, ""); + return SKB_DROP_REASON_TCP_AUTH_HDR; + } + + if (req) { + if (tcp_rsk_used_ao(req) != !!aoh) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); + tcp_hash_fail("TCP connection can't start/end using TCP-AO", + family, skb, "%s", + !aoh ? "missing AO" : "AO signed"); + return SKB_DROP_REASON_TCP_AOFAILURE; + } + } + + /* sdif set, means packet ingressed via a device + * in an L3 domain and dif is set to the l3mdev + */ + l3index = sdif ? dif : 0; + + /* Fast path: unsigned segments */ + if (likely(!md5_location && !aoh)) { + /* Drop if there's TCP-MD5 or TCP-AO key with any rcvid/sndid + * for the remote peer. On TCP-AO established connection + * the last key is impossible to remove, so there's + * always at least one current_key. + */ + if (tcp_ao_required(sk, saddr, family, l3index, true)) { + tcp_hash_fail("AO hash is required, but not found", + family, skb, "L3 index %d", l3index); + return SKB_DROP_REASON_TCP_AONOTFOUND; + } + if (unlikely(tcp_md5_do_lookup(sk, l3index, saddr, family))) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); + tcp_hash_fail("MD5 Hash not found", + family, skb, "L3 index %d", l3index); + return SKB_DROP_REASON_TCP_MD5NOTFOUND; + } + return SKB_NOT_DROPPED_YET; + } + + if (aoh) + return tcp_inbound_ao_hash(sk, skb, family, req, l3index, aoh); + + return tcp_inbound_md5_hash(sk, skb, saddr, daddr, family, + l3index, md5_location); +} + #endif /* _TCP_H */ diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h new file mode 100644 index 000000000000..471e177362b4 --- /dev/null +++ b/include/net/tcp_ao.h @@ -0,0 +1,387 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _TCP_AO_H +#define _TCP_AO_H + +#define TCP_AO_KEY_ALIGN 1 +#define __tcp_ao_key_align __aligned(TCP_AO_KEY_ALIGN) + +union tcp_ao_addr { + struct in_addr a4; +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr a6; +#endif +}; + +struct tcp_ao_hdr { + u8 kind; + u8 length; + u8 keyid; + u8 rnext_keyid; +}; + +struct tcp_ao_counters { + atomic64_t pkt_good; + atomic64_t pkt_bad; + atomic64_t key_not_found; + atomic64_t ao_required; + atomic64_t dropped_icmp; +}; + +struct tcp_ao_key { + struct hlist_node node; + union tcp_ao_addr addr; + u8 key[TCP_AO_MAXKEYLEN] __tcp_ao_key_align; + unsigned int tcp_sigpool_id; + unsigned int digest_size; + int l3index; + u8 prefixlen; + u8 family; + u8 keylen; + u8 keyflags; + u8 sndid; + u8 rcvid; + u8 maclen; + struct rcu_head rcu; + atomic64_t pkt_good; + atomic64_t pkt_bad; + u8 traffic_keys[]; +}; + +static inline u8 *rcv_other_key(struct tcp_ao_key *key) +{ + return key->traffic_keys; +} + +static inline u8 *snd_other_key(struct tcp_ao_key *key) +{ + return key->traffic_keys + key->digest_size; +} + +static inline int tcp_ao_maclen(const struct tcp_ao_key *key) +{ + return key->maclen; +} + +/* Use tcp_ao_len_aligned() for TCP header calculations */ +static inline int tcp_ao_len(const struct tcp_ao_key *key) +{ + return tcp_ao_maclen(key) + sizeof(struct tcp_ao_hdr); +} + +static inline int tcp_ao_len_aligned(const struct tcp_ao_key *key) +{ + return round_up(tcp_ao_len(key), 4); +} + +static inline unsigned int tcp_ao_digest_size(struct tcp_ao_key *key) +{ + return key->digest_size; +} + +static inline int tcp_ao_sizeof_key(const struct tcp_ao_key *key) +{ + return sizeof(struct tcp_ao_key) + (key->digest_size << 1); +} + +struct tcp_ao_info { + /* List of tcp_ao_key's */ + struct hlist_head head; + /* current_key and rnext_key aren't maintained on listen sockets. + * Their purpose is to cache keys on established connections, + * saving needless lookups. Never dereference any of them from + * listen sockets. + * ::current_key may change in RX to the key that was requested by + * the peer, please use READ_ONCE()/WRITE_ONCE() in order to avoid + * load/store tearing. + * Do the same for ::rnext_key, if you don't hold socket lock + * (it's changed only by userspace request in setsockopt()). + */ + struct tcp_ao_key *current_key; + struct tcp_ao_key *rnext_key; + struct tcp_ao_counters counters; + u32 ao_required :1, + accept_icmps :1, + __unused :30; + __be32 lisn; + __be32 risn; + /* Sequence Number Extension (SNE) are upper 4 bytes for SEQ, + * that protect TCP-AO connection from replayed old TCP segments. + * See RFC5925 (6.2). + * In order to get correct SNE, there's a helper tcp_ao_compute_sne(). + * It needs SEQ basis to understand whereabouts are lower SEQ numbers. + * According to that basis vector, it can provide incremented SNE + * when SEQ rolls over or provide decremented SNE when there's + * a retransmitted segment from before-rolling over. + * - for request sockets such basis is rcv_isn/snt_isn, which seems + * good enough as it's unexpected to receive 4 Gbytes on reqsk. + * - for full sockets the basis is rcv_nxt/snd_una. snd_una is + * taken instead of snd_nxt as currently it's easier to track + * in tcp_snd_una_update(), rather than updating SNE in all + * WRITE_ONCE(tp->snd_nxt, ...) + * - for time-wait sockets the basis is tw_rcv_nxt/tw_snd_nxt. + * tw_snd_nxt is not expected to change, while tw_rcv_nxt may. + */ + u32 snd_sne; + u32 rcv_sne; + refcount_t refcnt; /* Protects twsk destruction */ + struct rcu_head rcu; +}; + +#ifdef CONFIG_TCP_MD5SIG +#include <linux/jump_label.h> +extern struct static_key_false_deferred tcp_md5_needed; +#define static_branch_tcp_md5() static_branch_unlikely(&tcp_md5_needed.key) +#else +#define static_branch_tcp_md5() false +#endif +#ifdef CONFIG_TCP_AO +/* TCP-AO structures and functions */ +#include <linux/jump_label.h> +extern struct static_key_false_deferred tcp_ao_needed; +#define static_branch_tcp_ao() static_branch_unlikely(&tcp_ao_needed.key) +#else +#define static_branch_tcp_ao() false +#endif + +static inline bool tcp_hash_should_produce_warnings(void) +{ + return static_branch_tcp_md5() || static_branch_tcp_ao(); +} + +#define tcp_hash_fail(msg, family, skb, fmt, ...) \ +do { \ + const struct tcphdr *th = tcp_hdr(skb); \ + char hdr_flags[6]; \ + char *f = hdr_flags; \ + \ + if (!tcp_hash_should_produce_warnings()) \ + break; \ + if (th->fin) \ + *f++ = 'F'; \ + if (th->syn) \ + *f++ = 'S'; \ + if (th->rst) \ + *f++ = 'R'; \ + if (th->psh) \ + *f++ = 'P'; \ + if (th->ack) \ + *f++ = '.'; \ + *f = 0; \ + if ((family) == AF_INET) { \ + net_info_ratelimited("%s for %pI4.%d->%pI4.%d [%s] " fmt "\n", \ + msg, &ip_hdr(skb)->saddr, ntohs(th->source), \ + &ip_hdr(skb)->daddr, ntohs(th->dest), \ + hdr_flags, ##__VA_ARGS__); \ + } else { \ + net_info_ratelimited("%s for [%pI6c].%d->[%pI6c].%d [%s]" fmt "\n", \ + msg, &ipv6_hdr(skb)->saddr, ntohs(th->source), \ + &ipv6_hdr(skb)->daddr, ntohs(th->dest), \ + hdr_flags, ##__VA_ARGS__); \ + } \ +} while (0) + +#ifdef CONFIG_TCP_AO +/* TCP-AO structures and functions */ +struct tcp4_ao_context { + __be32 saddr; + __be32 daddr; + __be16 sport; + __be16 dport; + __be32 sisn; + __be32 disn; +}; + +struct tcp6_ao_context { + struct in6_addr saddr; + struct in6_addr daddr; + __be16 sport; + __be16 dport; + __be32 sisn; + __be32 disn; +}; + +struct tcp_sigpool; +#define TCP_AO_ESTABLISHED (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | \ + TCPF_CLOSE | TCPF_CLOSE_WAIT | \ + TCPF_LAST_ACK | TCPF_CLOSING) + +int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb, + struct tcp_ao_key *key, struct tcphdr *th, + __u8 *hash_location); +int tcp_ao_hash_skb(unsigned short int family, + char *ao_hash, struct tcp_ao_key *key, + const struct sock *sk, const struct sk_buff *skb, + const u8 *tkey, int hash_offset, u32 sne); +int tcp_parse_ao(struct sock *sk, int cmd, unsigned short int family, + sockptr_t optval, int optlen); +struct tcp_ao_key *tcp_ao_established_key(struct tcp_ao_info *ao, + int sndid, int rcvid); +int tcp_ao_copy_all_matching(const struct sock *sk, struct sock *newsk, + struct request_sock *req, struct sk_buff *skb, + int family); +int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx, + unsigned int len, struct tcp_sigpool *hp); +void tcp_ao_destroy_sock(struct sock *sk, bool twsk); +void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw, struct tcp_sock *tp); +bool tcp_ao_ignore_icmp(const struct sock *sk, int family, int type, int code); +int tcp_ao_get_mkts(struct sock *sk, sockptr_t optval, sockptr_t optlen); +int tcp_ao_get_sock_info(struct sock *sk, sockptr_t optval, sockptr_t optlen); +int tcp_ao_get_repair(struct sock *sk, sockptr_t optval, sockptr_t optlen); +int tcp_ao_set_repair(struct sock *sk, sockptr_t optval, unsigned int optlen); +enum skb_drop_reason tcp_inbound_ao_hash(struct sock *sk, + const struct sk_buff *skb, unsigned short int family, + const struct request_sock *req, int l3index, + const struct tcp_ao_hdr *aoh); +u32 tcp_ao_compute_sne(u32 next_sne, u32 next_seq, u32 seq); +struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, int l3index, + const union tcp_ao_addr *addr, + int family, int sndid, int rcvid); +int tcp_ao_hash_hdr(unsigned short family, char *ao_hash, + struct tcp_ao_key *key, const u8 *tkey, + const union tcp_ao_addr *daddr, + const union tcp_ao_addr *saddr, + const struct tcphdr *th, u32 sne); +int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb, + const struct tcp_ao_hdr *aoh, int l3index, u32 seq, + struct tcp_ao_key **key, char **traffic_key, + bool *allocated_traffic_key, u8 *keyid, u32 *sne); + +/* ipv4 specific functions */ +int tcp_v4_parse_ao(struct sock *sk, int cmd, sockptr_t optval, int optlen); +struct tcp_ao_key *tcp_v4_ao_lookup(const struct sock *sk, struct sock *addr_sk, + int sndid, int rcvid); +int tcp_v4_ao_synack_hash(char *ao_hash, struct tcp_ao_key *mkt, + struct request_sock *req, const struct sk_buff *skb, + int hash_offset, u32 sne); +int tcp_v4_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key, + const struct sock *sk, + __be32 sisn, __be32 disn, bool send); +int tcp_v4_ao_calc_key_rsk(struct tcp_ao_key *mkt, u8 *key, + struct request_sock *req); +struct tcp_ao_key *tcp_v4_ao_lookup_rsk(const struct sock *sk, + struct request_sock *req, + int sndid, int rcvid); +int tcp_v4_ao_hash_skb(char *ao_hash, struct tcp_ao_key *key, + const struct sock *sk, const struct sk_buff *skb, + const u8 *tkey, int hash_offset, u32 sne); +/* ipv6 specific functions */ +int tcp_v6_ao_hash_pseudoheader(struct tcp_sigpool *hp, + const struct in6_addr *daddr, + const struct in6_addr *saddr, int nbytes); +int tcp_v6_ao_calc_key_skb(struct tcp_ao_key *mkt, u8 *key, + const struct sk_buff *skb, __be32 sisn, __be32 disn); +int tcp_v6_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key, + const struct sock *sk, __be32 sisn, + __be32 disn, bool send); +int tcp_v6_ao_calc_key_rsk(struct tcp_ao_key *mkt, u8 *key, + struct request_sock *req); +struct tcp_ao_key *tcp_v6_ao_lookup(const struct sock *sk, + struct sock *addr_sk, int sndid, int rcvid); +struct tcp_ao_key *tcp_v6_ao_lookup_rsk(const struct sock *sk, + struct request_sock *req, + int sndid, int rcvid); +int tcp_v6_ao_hash_skb(char *ao_hash, struct tcp_ao_key *key, + const struct sock *sk, const struct sk_buff *skb, + const u8 *tkey, int hash_offset, u32 sne); +int tcp_v6_parse_ao(struct sock *sk, int cmd, sockptr_t optval, int optlen); +int tcp_v6_ao_synack_hash(char *ao_hash, struct tcp_ao_key *ao_key, + struct request_sock *req, const struct sk_buff *skb, + int hash_offset, u32 sne); +void tcp_ao_established(struct sock *sk); +void tcp_ao_finish_connect(struct sock *sk, struct sk_buff *skb); +void tcp_ao_connect_init(struct sock *sk); +void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb, + struct request_sock *req, unsigned short int family); +#else /* CONFIG_TCP_AO */ + +static inline int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb, + struct tcp_ao_key *key, struct tcphdr *th, + __u8 *hash_location) +{ + return 0; +} + +static inline void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb, + struct request_sock *req, unsigned short int family) +{ +} + +static inline bool tcp_ao_ignore_icmp(const struct sock *sk, int family, + int type, int code) +{ + return false; +} + +static inline enum skb_drop_reason tcp_inbound_ao_hash(struct sock *sk, + const struct sk_buff *skb, unsigned short int family, + const struct request_sock *req, int l3index, + const struct tcp_ao_hdr *aoh) +{ + return SKB_NOT_DROPPED_YET; +} + +static inline struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, + int l3index, const union tcp_ao_addr *addr, + int family, int sndid, int rcvid) +{ + return NULL; +} + +static inline void tcp_ao_destroy_sock(struct sock *sk, bool twsk) +{ +} + +static inline void tcp_ao_established(struct sock *sk) +{ +} + +static inline void tcp_ao_finish_connect(struct sock *sk, struct sk_buff *skb) +{ +} + +static inline void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw, + struct tcp_sock *tp) +{ +} + +static inline void tcp_ao_connect_init(struct sock *sk) +{ +} + +static inline int tcp_ao_get_mkts(struct sock *sk, sockptr_t optval, sockptr_t optlen) +{ + return -ENOPROTOOPT; +} + +static inline int tcp_ao_get_sock_info(struct sock *sk, sockptr_t optval, sockptr_t optlen) +{ + return -ENOPROTOOPT; +} + +static inline int tcp_ao_get_repair(struct sock *sk, + sockptr_t optval, sockptr_t optlen) +{ + return -ENOPROTOOPT; +} + +static inline int tcp_ao_set_repair(struct sock *sk, + sockptr_t optval, unsigned int optlen) +{ + return -ENOPROTOOPT; +} +#endif + +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) +int tcp_do_parse_auth_options(const struct tcphdr *th, + const u8 **md5_hash, const u8 **ao_hash); +#else +static inline int tcp_do_parse_auth_options(const struct tcphdr *th, + const u8 **md5_hash, const u8 **ao_hash) +{ + *md5_hash = NULL; + *ao_hash = NULL; + return 0; +} +#endif + +#endif /* _TCP_AO_H */ diff --git a/include/net/tcp_states.h b/include/net/tcp_states.h index cc00118acca1..d60e8148ff4c 100644 --- a/include/net/tcp_states.h +++ b/include/net/tcp_states.h @@ -22,6 +22,7 @@ enum { TCP_LISTEN, TCP_CLOSING, /* Now a valid state */ TCP_NEW_SYN_RECV, + TCP_BOUND_INACTIVE, /* Pseudo-state for inet_diag */ TCP_MAX_STATES /* Leave at the end! */ }; @@ -43,6 +44,7 @@ enum { TCPF_LISTEN = (1 << TCP_LISTEN), TCPF_CLOSING = (1 << TCP_CLOSING), TCPF_NEW_SYN_RECV = (1 << TCP_NEW_SYN_RECV), + TCPF_BOUND_INACTIVE = (1 << TCP_BOUND_INACTIVE), }; #endif /* _LINUX_TCP_STATES_H */ diff --git a/include/net/tcx.h b/include/net/tcx.h index 264f147953ba..04be9377785d 100644 --- a/include/net/tcx.h +++ b/include/net/tcx.h @@ -38,16 +38,11 @@ static inline struct tcx_entry *tcx_entry(struct bpf_mprog_entry *entry) return container_of(bundle, struct tcx_entry, bundle); } -static inline struct tcx_link *tcx_link(struct bpf_link *link) +static inline struct tcx_link *tcx_link(const struct bpf_link *link) { return container_of(link, struct tcx_link, link); } -static inline const struct tcx_link *tcx_link_const(const struct bpf_link *link) -{ - return tcx_link((struct bpf_link *)link); -} - void tcx_inc(void); void tcx_dec(void); diff --git a/include/net/tls.h b/include/net/tls.h index a2b44578dcb7..962f0c501111 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -61,7 +61,8 @@ struct tls_rec; #define TLS_AAD_SPACE_SIZE 13 -#define MAX_IV_SIZE 16 +#define TLS_MAX_IV_SIZE 16 +#define TLS_MAX_SALT_SIZE 4 #define TLS_TAG_SIZE 16 #define TLS_MAX_REC_SEQ_SIZE 8 #define TLS_MAX_AAD_SIZE TLS_AAD_SPACE_SIZE @@ -149,6 +150,7 @@ struct tls_record_info { skb_frag_t frags[MAX_SKB_FRAGS]; }; +#define TLS_DRIVER_STATE_SIZE_TX 16 struct tls_offload_context_tx { struct crypto_aead *aead_send; spinlock_t lock; /* protects records list */ @@ -162,17 +164,13 @@ struct tls_offload_context_tx { void (*sk_destruct)(struct sock *sk); struct work_struct destruct_work; struct tls_context *ctx; - u8 driver_state[] __aligned(8); /* The TLS layer reserves room for driver specific state * Currently the belief is that there is not enough * driver specific state to justify another layer of indirection */ -#define TLS_DRIVER_STATE_SIZE_TX 16 + u8 driver_state[TLS_DRIVER_STATE_SIZE_TX] __aligned(8); }; -#define TLS_OFFLOAD_CONTEXT_SIZE_TX \ - (sizeof(struct tls_offload_context_tx) + TLS_DRIVER_STATE_SIZE_TX) - enum tls_context_flags { /* tls_device_down was called after the netdev went down, device state * was released, and kTLS works in software, even though rx_conf is @@ -193,8 +191,8 @@ enum tls_context_flags { }; struct cipher_context { - char *iv; - char *rec_seq; + char iv[TLS_MAX_IV_SIZE + TLS_MAX_SALT_SIZE]; + char rec_seq[TLS_MAX_REC_SEQ_SIZE]; }; union tls_crypto_context { @@ -302,6 +300,7 @@ struct tls_offload_resync_async { u32 log[TLS_DEVICE_RESYNC_ASYNC_LOGMAX]; }; +#define TLS_DRIVER_STATE_SIZE_RX 8 struct tls_offload_context_rx { /* sw must be the first member of tls_offload_context_rx */ struct tls_sw_context_rx sw; @@ -325,17 +324,13 @@ struct tls_offload_context_rx { struct tls_offload_resync_async *resync_async; }; }; - u8 driver_state[] __aligned(8); /* The TLS layer reserves room for driver specific state * Currently the belief is that there is not enough * driver specific state to justify another layer of indirection */ -#define TLS_DRIVER_STATE_SIZE_RX 8 + u8 driver_state[TLS_DRIVER_STATE_SIZE_RX] __aligned(8); }; -#define TLS_OFFLOAD_CONTEXT_SIZE_RX \ - (sizeof(struct tls_offload_context_rx) + TLS_DRIVER_STATE_SIZE_RX) - struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context, u32 seq, u64 *p_record_sn); diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 0ca9b7a11baf..d716214fe03d 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -154,13 +154,30 @@ void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, - struct net_device *dev, struct in6_addr *saddr, - struct in6_addr *daddr, + struct net_device *dev, + const struct in6_addr *saddr, + const struct in6_addr *daddr, __u8 prio, __u8 ttl, __be32 label, __be16 src_port, __be16 dst_port, bool nocheck); void udp_tunnel_sock_release(struct socket *sock); +struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb, + struct net_device *dev, + struct net *net, int oif, + __be32 *saddr, + const struct ip_tunnel_key *key, + __be16 sport, __be16 dport, u8 tos, + struct dst_cache *dst_cache); +struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb, + struct net_device *dev, + struct net *net, + struct socket *sock, int oif, + struct in6_addr *saddr, + const struct ip_tunnel_key *key, + __be16 sport, __be16 dport, u8 dsfield, + struct dst_cache *dst_cache); + struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, __be16 flags, __be64 tunnel_id, int md_size); @@ -174,16 +191,13 @@ static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum) } #endif -static inline void udp_tunnel_encap_enable(struct socket *sock) +static inline void udp_tunnel_encap_enable(struct sock *sk) { - struct udp_sock *up = udp_sk(sock->sk); - - if (up->encap_enabled) + if (udp_test_and_set_bit(ENCAP_ENABLED, sk)) return; - up->encap_enabled = 1; #if IS_ENABLED(CONFIG_IPV6) - if (sock->sk->sk_family == PF_INET6) + if (READ_ONCE(sk->sk_family) == PF_INET6) ipv6_stub->udpv6_encap_enable(); #endif udp_encap_enable(); diff --git a/include/net/udplite.h b/include/net/udplite.h index bd33ff2b8f42..786919d29f8d 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -66,14 +66,18 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh) /* Fast-path computation of checksum. Socket may not be locked. */ static inline __wsum udplite_csum(struct sk_buff *skb) { - const struct udp_sock *up = udp_sk(skb->sk); const int off = skb_transport_offset(skb); + const struct sock *sk = skb->sk; int len = skb->len - off; - if ((up->pcflag & UDPLITE_SEND_CC) && up->pcslen < len) { - if (0 < up->pcslen) - len = up->pcslen; - udp_hdr(skb)->len = htons(up->pcslen); + if (udp_test_bit(UDPLITE_SEND_CC, sk)) { + u16 pcslen = READ_ONCE(udp_sk(sk)->pcslen); + + if (pcslen < len) { + if (pcslen > 0) + len = pcslen; + udp_hdr(skb)->len = htons(pcslen); + } } skb->ip_summed = CHECKSUM_NONE; /* no HW support for checksumming */ diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 6a9f8a5f387c..33ba6fc151cf 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -210,22 +210,23 @@ struct vxlan_rdst { }; struct vxlan_config { - union vxlan_addr remote_ip; - union vxlan_addr saddr; - __be32 vni; - int remote_ifindex; - int mtu; - __be16 dst_port; - u16 port_min; - u16 port_max; - u8 tos; - u8 ttl; - __be32 label; - u32 flags; - unsigned long age_interval; - unsigned int addrmax; - bool no_share; - enum ifla_vxlan_df df; + union vxlan_addr remote_ip; + union vxlan_addr saddr; + __be32 vni; + int remote_ifindex; + int mtu; + __be16 dst_port; + u16 port_min; + u16 port_max; + u8 tos; + u8 ttl; + __be32 label; + enum ifla_vxlan_label_policy label_policy; + u32 flags; + unsigned long age_interval; + unsigned int addrmax; + bool no_share; + enum ifla_vxlan_df df; }; enum { diff --git a/include/net/xdp.h b/include/net/xdp.h index de08c8e0d134..e6770dd40c91 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -16,7 +16,7 @@ * * The XDP RX-queue info (xdp_rxq_info) is associated with the driver * level RX-ring queues. It is information that is specific to how - * the driver have configured a given RX-ring queue. + * the driver has configured a given RX-ring queue. * * Each xdp_buff frame received in the driver carries a (pointer) * reference to this xdp_rxq_info structure. This provides the XDP @@ -32,7 +32,7 @@ * The struct is not directly tied to the XDP prog. A new XDP prog * can be attached as long as it doesn't change the underlying * RX-ring. If the RX-ring does change significantly, the NIC driver - * naturally need to stop the RX-ring before purging and reallocating + * naturally needs to stop the RX-ring before purging and reallocating * memory. In that process the driver MUST call unregister (which * also applies for driver shutdown and unload). The register API is * also mandatory during RX-ring setup. @@ -369,7 +369,12 @@ xdp_data_meta_unsupported(const struct xdp_buff *xdp) static inline bool xdp_metalen_invalid(unsigned long metalen) { - return (metalen & (sizeof(__u32) - 1)) || (metalen > 32); + unsigned long meta_max; + + meta_max = type_max(typeof_member(struct skb_shared_info, meta_len)); + BUILD_BUG_ON(!__builtin_constant_p(meta_max)); + + return !IS_ALIGNED(metalen, sizeof(u32)) || metalen > meta_max; } struct xdp_attachment_info { @@ -383,14 +388,29 @@ void xdp_attachment_setup(struct xdp_attachment_info *info, #define DEV_MAP_BULK_SIZE XDP_BULK_QUEUE_SIZE +/* Define the relationship between xdp-rx-metadata kfunc and + * various other entities: + * - xdp_rx_metadata enum + * - netdev netlink enum (Documentation/netlink/specs/netdev.yaml) + * - kfunc name + * - xdp_metadata_ops field + */ #define XDP_METADATA_KFUNC_xxx \ XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_TIMESTAMP, \ - bpf_xdp_metadata_rx_timestamp) \ + NETDEV_XDP_RX_METADATA_TIMESTAMP, \ + bpf_xdp_metadata_rx_timestamp, \ + xmo_rx_timestamp) \ XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_HASH, \ - bpf_xdp_metadata_rx_hash) \ - -enum { -#define XDP_METADATA_KFUNC(name, _) name, + NETDEV_XDP_RX_METADATA_HASH, \ + bpf_xdp_metadata_rx_hash, \ + xmo_rx_hash) \ + XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_VLAN_TAG, \ + NETDEV_XDP_RX_METADATA_VLAN_TAG, \ + bpf_xdp_metadata_rx_vlan_tag, \ + xmo_rx_vlan_tag) \ + +enum xdp_rx_metadata { +#define XDP_METADATA_KFUNC(name, _, __, ___) name, XDP_METADATA_KFUNC_xxx #undef XDP_METADATA_KFUNC MAX_XDP_METADATA_KFUNC, @@ -416,6 +436,7 @@ enum xdp_rss_hash_type { XDP_RSS_L4_UDP = BIT(5), XDP_RSS_L4_SCTP = BIT(6), XDP_RSS_L4_IPSEC = BIT(7), /* L4 based hash include IPSEC SPI */ + XDP_RSS_L4_ICMP = BIT(8), /* Second part: RSS hash type combinations used for driver HW mapping */ XDP_RSS_TYPE_NONE = 0, @@ -431,11 +452,13 @@ enum xdp_rss_hash_type { XDP_RSS_TYPE_L4_IPV4_UDP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_UDP, XDP_RSS_TYPE_L4_IPV4_SCTP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_SCTP, XDP_RSS_TYPE_L4_IPV4_IPSEC = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_IPSEC, + XDP_RSS_TYPE_L4_IPV4_ICMP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_ICMP, XDP_RSS_TYPE_L4_IPV6_TCP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_TCP, XDP_RSS_TYPE_L4_IPV6_UDP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_UDP, XDP_RSS_TYPE_L4_IPV6_SCTP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_SCTP, XDP_RSS_TYPE_L4_IPV6_IPSEC = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_IPSEC, + XDP_RSS_TYPE_L4_IPV6_ICMP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_ICMP, XDP_RSS_TYPE_L4_IPV6_TCP_EX = XDP_RSS_TYPE_L4_IPV6_TCP | XDP_RSS_L3_DYNHDR, XDP_RSS_TYPE_L4_IPV6_UDP_EX = XDP_RSS_TYPE_L4_IPV6_UDP | XDP_RSS_L3_DYNHDR, @@ -446,6 +469,8 @@ struct xdp_metadata_ops { int (*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp); int (*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash, enum xdp_rss_hash_type *rss_type); + int (*xmo_rx_vlan_tag)(const struct xdp_md *ctx, __be16 *vlan_proto, + u16 *vlan_tci); }; #ifdef CONFIG_NET diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 1617af380162..3cb4dc9bd70e 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -14,6 +14,8 @@ #include <linux/mm.h> #include <net/sock.h> +#define XDP_UMEM_SG_FLAG (1 << 1) + struct net_device; struct xsk_queue; struct xdp_buff; @@ -28,6 +30,7 @@ struct xdp_umem { struct user_struct *user; refcount_t users; u8 flags; + u8 tx_metadata_len; bool zc; struct page **pgs; int id; @@ -61,6 +64,13 @@ struct xdp_sock { struct xsk_queue *tx ____cacheline_aligned_in_smp; struct list_head tx_list; + /* record the number of tx descriptors sent by this xsk and + * when it exceeds MAX_PER_SOCKET_BUDGET, an opportunity needs + * to be given to other xsks for sending tx descriptors, thereby + * preventing other XSKs from being starved. + */ + u32 tx_budget_spent; + /* Protects generic receive. */ spinlock_t rx_lock; @@ -83,12 +93,105 @@ struct xdp_sock { struct xsk_queue *cq_tmp; /* Only as tmp storage before bind */ }; +/* + * AF_XDP TX metadata hooks for network devices. + * The following hooks can be defined; unless noted otherwise, they are + * optional and can be filled with a null pointer. + * + * void (*tmo_request_timestamp)(void *priv) + * Called when AF_XDP frame requested egress timestamp. + * + * u64 (*tmo_fill_timestamp)(void *priv) + * Called when AF_XDP frame, that had requested egress timestamp, + * received a completion. The hook needs to return the actual HW timestamp. + * + * void (*tmo_request_checksum)(u16 csum_start, u16 csum_offset, void *priv) + * Called when AF_XDP frame requested HW checksum offload. csum_start + * indicates position where checksumming should start. + * csum_offset indicates position where checksum should be stored. + * + */ +struct xsk_tx_metadata_ops { + void (*tmo_request_timestamp)(void *priv); + u64 (*tmo_fill_timestamp)(void *priv); + void (*tmo_request_checksum)(u16 csum_start, u16 csum_offset, void *priv); +}; + #ifdef CONFIG_XDP_SOCKETS int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp); int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp); void __xsk_map_flush(void); +/** + * xsk_tx_metadata_to_compl - Save enough relevant metadata information + * to perform tx completion in the future. + * @meta: pointer to AF_XDP metadata area + * @compl: pointer to output struct xsk_tx_metadata_to_compl + * + * This function should be called by the networking device when + * it prepares AF_XDP egress packet. The value of @compl should be stored + * and passed to xsk_tx_metadata_complete upon TX completion. + */ +static inline void xsk_tx_metadata_to_compl(struct xsk_tx_metadata *meta, + struct xsk_tx_metadata_compl *compl) +{ + if (!meta) + return; + + if (meta->flags & XDP_TXMD_FLAGS_TIMESTAMP) + compl->tx_timestamp = &meta->completion.tx_timestamp; + else + compl->tx_timestamp = NULL; +} + +/** + * xsk_tx_metadata_request - Evaluate AF_XDP TX metadata at submission + * and call appropriate xsk_tx_metadata_ops operation. + * @meta: pointer to AF_XDP metadata area + * @ops: pointer to struct xsk_tx_metadata_ops + * @priv: pointer to driver-private aread + * + * This function should be called by the networking device when + * it prepares AF_XDP egress packet. + */ +static inline void xsk_tx_metadata_request(const struct xsk_tx_metadata *meta, + const struct xsk_tx_metadata_ops *ops, + void *priv) +{ + if (!meta) + return; + + if (ops->tmo_request_timestamp) + if (meta->flags & XDP_TXMD_FLAGS_TIMESTAMP) + ops->tmo_request_timestamp(priv); + + if (ops->tmo_request_checksum) + if (meta->flags & XDP_TXMD_FLAGS_CHECKSUM) + ops->tmo_request_checksum(meta->request.csum_start, + meta->request.csum_offset, priv); +} + +/** + * xsk_tx_metadata_complete - Evaluate AF_XDP TX metadata at completion + * and call appropriate xsk_tx_metadata_ops operation. + * @compl: pointer to completion metadata produced from xsk_tx_metadata_to_compl + * @ops: pointer to struct xsk_tx_metadata_ops + * @priv: pointer to driver-private aread + * + * This function should be called by the networking device upon + * AF_XDP egress completion. + */ +static inline void xsk_tx_metadata_complete(struct xsk_tx_metadata_compl *compl, + const struct xsk_tx_metadata_ops *ops, + void *priv) +{ + if (!compl) + return; + + *compl->tx_timestamp = ops->tmo_fill_timestamp(priv); +} + #else static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) @@ -105,6 +208,32 @@ static inline void __xsk_map_flush(void) { } +static inline void xsk_tx_metadata_to_compl(struct xsk_tx_metadata *meta, + struct xsk_tx_metadata_compl *compl) +{ +} + +static inline void xsk_tx_metadata_request(struct xsk_tx_metadata *meta, + const struct xsk_tx_metadata_ops *ops, + void *priv) +{ +} + +static inline void xsk_tx_metadata_complete(struct xsk_tx_metadata_compl *compl, + const struct xsk_tx_metadata_ops *ops, + void *priv) +{ +} + #endif /* CONFIG_XDP_SOCKETS */ +#if defined(CONFIG_XDP_SOCKETS) && defined(CONFIG_DEBUG_NET) +bool xsk_map_check_flush(void); +#else +static inline bool xsk_map_check_flush(void) +{ + return false; +} +#endif + #endif /* _LINUX_XDP_SOCK_H */ diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 1f6fc8c7a84c..c9aec9ab6191 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -12,6 +12,12 @@ #define XDP_UMEM_MIN_CHUNK_SHIFT 11 #define XDP_UMEM_MIN_CHUNK_SIZE (1 << XDP_UMEM_MIN_CHUNK_SHIFT) +struct xsk_cb_desc { + void *src; + u8 off; + u8 bytes; +}; + #ifdef CONFIG_XDP_SOCKETS void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries); @@ -47,6 +53,12 @@ static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool, xp_set_rxq_info(pool, rxq); } +static inline void xsk_pool_fill_cb(struct xsk_buff_pool *pool, + struct xsk_cb_desc *desc) +{ + xp_fill_cb(pool, desc); +} + static inline unsigned int xsk_pool_get_napi_id(struct xsk_buff_pool *pool) { #ifdef CONFIG_NET_RX_BUSY_POLL @@ -147,11 +159,29 @@ static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first) return ret; } +static inline void xsk_buff_del_tail(struct xdp_buff *tail) +{ + struct xdp_buff_xsk *xskb = container_of(tail, struct xdp_buff_xsk, xdp); + + list_del(&xskb->xskb_list_node); +} + +static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first) +{ + struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp); + struct xdp_buff_xsk *frag; + + frag = list_last_entry(&xskb->pool->xskb_list, struct xdp_buff_xsk, + xskb_list_node); + return &frag->xdp; +} + static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size) { xdp->data = xdp->data_hard_start + XDP_PACKET_HEADROOM; xdp->data_meta = xdp->data; xdp->data_end = xdp->data + size; + xdp->flags = 0; } static inline dma_addr_t xsk_buff_raw_get_dma(struct xsk_buff_pool *pool, @@ -165,6 +195,30 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr) return xp_raw_get_data(pool, addr); } +#define XDP_TXMD_FLAGS_VALID ( \ + XDP_TXMD_FLAGS_TIMESTAMP | \ + XDP_TXMD_FLAGS_CHECKSUM | \ + 0) + +static inline bool xsk_buff_valid_tx_metadata(struct xsk_tx_metadata *meta) +{ + return !(meta->flags & ~XDP_TXMD_FLAGS_VALID); +} + +static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr) +{ + struct xsk_tx_metadata *meta; + + if (!pool->tx_metadata_len) + return NULL; + + meta = xp_raw_get_data(pool, addr) - pool->tx_metadata_len; + if (unlikely(!xsk_buff_valid_tx_metadata(meta))) + return NULL; /* no way to signal the error to the user */ + + return meta; +} + static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool) { struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp); @@ -250,6 +304,11 @@ static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool, { } +static inline void xsk_pool_fill_cb(struct xsk_buff_pool *pool, + struct xsk_cb_desc *desc) +{ +} + static inline unsigned int xsk_pool_get_napi_id(struct xsk_buff_pool *pool) { return 0; @@ -309,6 +368,15 @@ static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first) return NULL; } +static inline void xsk_buff_del_tail(struct xdp_buff *tail) +{ +} + +static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first) +{ + return NULL; +} + static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size) { } @@ -324,6 +392,16 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr) return NULL; } +static inline bool xsk_buff_valid_tx_metadata(struct xsk_tx_metadata *meta) +{ + return false; +} + +static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr) +{ + return NULL; +} + static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool) { } diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 363c7d510554..1d107241b901 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1207,20 +1207,20 @@ static inline int xfrm6_policy_check_reverse(struct sock *sk, int dir, return __xfrm_policy_check2(sk, dir, skb, AF_INET6, 1); } -int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, +int __xfrm_decode_session(struct net *net, struct sk_buff *skb, struct flowi *fl, unsigned int family, int reverse); -static inline int xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, +static inline int xfrm_decode_session(struct net *net, struct sk_buff *skb, struct flowi *fl, unsigned int family) { - return __xfrm_decode_session(skb, fl, family, 0); + return __xfrm_decode_session(net, skb, fl, family, 0); } -static inline int xfrm_decode_session_reverse(struct sk_buff *skb, +static inline int xfrm_decode_session_reverse(struct net *net, struct sk_buff *skb, struct flowi *fl, unsigned int family) { - return __xfrm_decode_session(skb, fl, family, 1); + return __xfrm_decode_session(net, skb, fl, family, 1); } int __xfrm_route_forward(struct sk_buff *skb, unsigned short family); @@ -1296,7 +1296,7 @@ static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *sk { return 1; } -static inline int xfrm_decode_session_reverse(struct sk_buff *skb, +static inline int xfrm_decode_session_reverse(struct net *net, struct sk_buff *skb, struct flowi *fl, unsigned int family) { @@ -1669,7 +1669,6 @@ int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb); #endif void xfrm_local_error(struct sk_buff *skb, int mtu); -int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb); int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); int xfrm4_transport_finish(struct sk_buff *skb, int async); @@ -1689,7 +1688,6 @@ int xfrm4_protocol_deregister(struct xfrm4_protocol *handler, unsigned char prot int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family); int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family); void xfrm4_local_error(struct sk_buff *skb, u32 mtu); -int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb); int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi, struct ip6_tnl *t); int xfrm6_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, @@ -1712,6 +1710,10 @@ int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb); void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu); int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb); int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb); +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, + struct sk_buff *skb); +struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, + struct sk_buff *skb); int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval, int optlen); #else @@ -2166,7 +2168,7 @@ static inline bool xfrm6_local_dontfrag(const struct sock *sk) proto = sk->sk_protocol; if (proto == IPPROTO_UDP || proto == IPPROTO_RAW) - return inet6_sk(sk)->dontfrag; + return inet6_test_bit(DONTFRAG, sk); return false; } @@ -2188,4 +2190,13 @@ static inline int register_xfrm_interface_bpf(void) #endif +#if IS_ENABLED(CONFIG_DEBUG_INFO_BTF) +int register_xfrm_state_bpf(void); +#else +static inline int register_xfrm_state_bpf(void) +{ + return 0; +} +#endif + #endif /* _NET_XFRM_H */ diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index b0bdff26fc88..99dd7376df6a 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -12,6 +12,7 @@ struct xsk_buff_pool; struct xdp_rxq_info; +struct xsk_cb_desc; struct xsk_queue; struct xdp_desc; struct xdp_umem; @@ -33,6 +34,7 @@ struct xdp_buff_xsk { }; #define XSK_CHECK_PRIV_TYPE(t) BUILD_BUG_ON(sizeof(t) > offsetofend(struct xdp_buff_xsk, cb)) +#define XSK_TX_COMPL_FITS(t) BUILD_BUG_ON(sizeof(struct xsk_tx_metadata_compl) > sizeof(t)) struct xsk_dma_map { dma_addr_t *dma_pages; @@ -77,10 +79,12 @@ struct xsk_buff_pool { u32 chunk_size; u32 chunk_shift; u32 frame_len; + u8 tx_metadata_len; /* inherited from umem */ u8 cached_need_wakeup; bool uses_need_wakeup; bool dma_need_sync; bool unaligned; + bool tx_sw_csum; void *addrs; /* Mutual exclusion of the completion ring in the SKB mode. Two cases to protect: * NAPI TX thread and sendmsg error paths in the SKB destructor callback and when @@ -132,6 +136,7 @@ static inline void xp_init_xskb_dma(struct xdp_buff_xsk *xskb, struct xsk_buff_p /* AF_XDP ZC drivers, via xdp_sock_buff.h */ void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq); +void xp_fill_cb(struct xsk_buff_pool *pool, struct xsk_cb_desc *desc); int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, unsigned long attrs, struct page **pages, u32 nr_pages); void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs); @@ -233,4 +238,9 @@ static inline u64 xp_get_handle(struct xdp_buff_xsk *xskb) return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT); } +static inline bool xp_tx_metadata_enabled(const struct xsk_buff_pool *pool) +{ + return pool->tx_metadata_len > 0; +} + #endif /* XSK_BUFF_POOL_H_ */ |