diff options
Diffstat (limited to 'include/net')
64 files changed, 1767 insertions, 539 deletions
diff --git a/include/net/9p/client.h b/include/net/9p/client.h index 78ebcf782ce5..4f785098c67a 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -207,6 +207,8 @@ int p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err int p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err); int p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err); +struct netfs_io_subrequest; +void p9_client_write_subreq(struct netfs_io_subrequest *subreq); int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset); int p9dirent_read(struct p9_client *clnt, char *buf, int len, struct p9_dirent *dirent); diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 9d06eb945509..62a407db1bf5 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -438,6 +438,10 @@ static inline void in6_ifa_hold(struct inet6_ifaddr *ifp) refcount_inc(&ifp->refcnt); } +static inline bool in6_ifa_hold_safe(struct inet6_ifaddr *ifp) +{ + return refcount_inc_not_zero(&ifp->refcnt); +} /* * compute link-local solicited-node multicast address diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 627ea8e2d915..b6eedf7650da 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -17,14 +17,31 @@ static inline struct unix_sock *unix_get_socket(struct file *filp) } #endif -extern spinlock_t unix_gc_lock; extern unsigned int unix_tot_inflight; - -void unix_inflight(struct user_struct *user, struct file *fp); -void unix_notinflight(struct user_struct *user, struct file *fp); +void unix_add_edges(struct scm_fp_list *fpl, struct unix_sock *receiver); +void unix_del_edges(struct scm_fp_list *fpl); +void unix_update_edges(struct unix_sock *receiver); +int unix_prepare_fpl(struct scm_fp_list *fpl); +void unix_destroy_fpl(struct scm_fp_list *fpl); void unix_gc(void); void wait_for_unix_gc(struct scm_fp_list *fpl); +struct unix_vertex { + struct list_head edges; + struct list_head entry; + struct list_head scc_entry; + unsigned long out_degree; + unsigned long index; + unsigned long scc_index; +}; + +struct unix_edge { + struct unix_sock *predecessor; + struct unix_sock *successor; + struct list_head vertex_entry; + struct list_head stack_entry; +}; + struct sock *unix_peer_get(struct sock *sk); #define UNIX_HASH_MOD (256 - 1) @@ -50,6 +67,7 @@ struct unix_skb_parms { struct scm_stat { atomic_t nr_fds; + unsigned long nr_unix_fds; }; #define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb)) @@ -62,12 +80,9 @@ struct unix_sock { struct path path; struct mutex iolock, bindlock; struct sock *peer; - struct list_head link; - unsigned long inflight; + struct sock *listener; + struct unix_vertex *vertex; spinlock_t lock; - unsigned long gc_flags; -#define UNIX_GC_CANDIDATE 0 -#define UNIX_GC_MAYBE_CYCLE 1 struct socket_wq peer_wq; wait_queue_entry_t peer_wake; struct scm_stat scm_stat; @@ -85,6 +100,9 @@ enum unix_socket_lock_class { U_LOCK_NORMAL, U_LOCK_SECOND, /* for double locking, see unix_state_double_lock(). */ U_LOCK_DIAG, /* used while dumping icons, see sk_diag_dump_icons(). */ + U_LOCK_GC_LISTENER, /* used for listening socket while determining gc + * candidates to close a small race window. + */ }; static inline void unix_state_lock_nested(struct sock *sk, diff --git a/include/net/ax25.h b/include/net/ax25.h index 0d939e5aee4e..cb622d84cd0c 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -139,7 +139,9 @@ enum { AX25_VALUES_N2, /* Default N2 value */ AX25_VALUES_PACLEN, /* AX.25 MTU */ AX25_VALUES_PROTOCOL, /* Std AX.25, DAMA Slave, DAMA Master */ +#ifdef CONFIG_AX25_DAMA_SLAVE AX25_VALUES_DS_TIMEOUT, /* DAMA Slave timeout */ +#endif AX25_MAX_VALUES /* THIS MUST REMAIN THE LAST ENTRY OF THIS LIST */ }; @@ -216,7 +218,7 @@ typedef struct { struct ctl_table; typedef struct ax25_dev { - struct ax25_dev *next; + struct list_head list; struct net_device *dev; netdevice_tracker dev_tracker; @@ -330,7 +332,6 @@ int ax25_addr_size(const ax25_digi *); void ax25_digi_invert(const ax25_digi *, ax25_digi *); /* ax25_dev.c */ -extern ax25_dev *ax25_dev_list; extern spinlock_t ax25_dev_lock; #if IS_ENABLED(CONFIG_AX25) diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 9fe95a22abeb..b3228bd6cd6b 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -285,7 +285,7 @@ void bt_err_ratelimited(const char *fmt, ...); bt_err_ratelimited("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) /* Connection and socket states */ -enum { +enum bt_sock_state { BT_CONNECTED = 1, /* Equal to TCP_ESTABLISHED to make net code happy */ BT_OPEN, BT_BOUND, @@ -585,6 +585,15 @@ static inline struct sk_buff *bt_skb_sendmmsg(struct sock *sk, return skb; } +static inline int bt_copy_from_sockptr(void *dst, size_t dst_size, + sockptr_t src, size_t src_size) +{ + if (dst_size > src_size) + return -EINVAL; + + return copy_from_sockptr(dst, src, dst_size); +} + int bt_to_errno(u16 code); __u8 bt_status(int err); diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 8701ca5f31ee..fe932ca3bc8c 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -33,9 +33,6 @@ #define HCI_MAX_FRAME_SIZE (HCI_MAX_ACL_SIZE + 4) #define HCI_LINK_KEY_SIZE 16 -#define HCI_AMP_LINK_KEY_SIZE (2 * HCI_LINK_KEY_SIZE) - -#define HCI_MAX_AMP_ASSOC_SIZE 672 #define HCI_MAX_CPB_DATA_SIZE 252 @@ -71,26 +68,6 @@ #define HCI_SMD 9 #define HCI_VIRTIO 10 -/* HCI controller types */ -#define HCI_PRIMARY 0x00 -#define HCI_AMP 0x01 - -/* First BR/EDR Controller shall have ID = 0 */ -#define AMP_ID_BREDR 0x00 - -/* AMP controller types */ -#define AMP_TYPE_BREDR 0x00 -#define AMP_TYPE_80211 0x01 - -/* AMP controller status */ -#define AMP_STATUS_POWERED_DOWN 0x00 -#define AMP_STATUS_BLUETOOTH_ONLY 0x01 -#define AMP_STATUS_NO_CAPACITY 0x02 -#define AMP_STATUS_LOW_CAPACITY 0x03 -#define AMP_STATUS_MEDIUM_CAPACITY 0x04 -#define AMP_STATUS_HIGH_CAPACITY 0x05 -#define AMP_STATUS_FULL_CAPACITY 0x06 - /* HCI device quirks */ enum { /* When this quirk is set, the HCI Reset command is send when @@ -176,6 +153,15 @@ enum { */ HCI_QUIRK_USE_BDADDR_PROPERTY, + /* When this quirk is set, the Bluetooth Device Address provided by + * the 'local-bd-address' fwnode property is incorrectly specified in + * big-endian order. + * + * This quirk can be set before hci_register_dev is called or + * during the hdev->setup vendor callback. + */ + HCI_QUIRK_BDADDR_PROPERTY_BROKEN, + /* When this quirk is set, the duplicate filtering during * scanning is based on Bluetooth devices addresses. To allow * RSSI based updates, restart scanning if needed. @@ -447,7 +433,6 @@ enum { #define HCI_AUTO_OFF_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ #define HCI_ACL_CONN_TIMEOUT msecs_to_jiffies(20000) /* 20 seconds */ #define HCI_LE_CONN_TIMEOUT msecs_to_jiffies(20000) /* 20 seconds */ -#define HCI_LE_AUTOCONN_TIMEOUT msecs_to_jiffies(4000) /* 4 seconds */ /* HCI data types */ #define HCI_COMMAND_PKT 0x01 @@ -519,7 +504,6 @@ enum { #define ESCO_LINK 0x02 /* Low Energy links do not have defined link type. Use invented one */ #define LE_LINK 0x80 -#define AMP_LINK 0x81 #define ISO_LINK 0x82 #define INVALID_LINK 0xff @@ -935,56 +919,6 @@ struct hci_cp_io_capability_neg_reply { __u8 reason; } __packed; -#define HCI_OP_CREATE_PHY_LINK 0x0435 -struct hci_cp_create_phy_link { - __u8 phy_handle; - __u8 key_len; - __u8 key_type; - __u8 key[HCI_AMP_LINK_KEY_SIZE]; -} __packed; - -#define HCI_OP_ACCEPT_PHY_LINK 0x0436 -struct hci_cp_accept_phy_link { - __u8 phy_handle; - __u8 key_len; - __u8 key_type; - __u8 key[HCI_AMP_LINK_KEY_SIZE]; -} __packed; - -#define HCI_OP_DISCONN_PHY_LINK 0x0437 -struct hci_cp_disconn_phy_link { - __u8 phy_handle; - __u8 reason; -} __packed; - -struct ext_flow_spec { - __u8 id; - __u8 stype; - __le16 msdu; - __le32 sdu_itime; - __le32 acc_lat; - __le32 flush_to; -} __packed; - -#define HCI_OP_CREATE_LOGICAL_LINK 0x0438 -#define HCI_OP_ACCEPT_LOGICAL_LINK 0x0439 -struct hci_cp_create_accept_logical_link { - __u8 phy_handle; - struct ext_flow_spec tx_flow_spec; - struct ext_flow_spec rx_flow_spec; -} __packed; - -#define HCI_OP_DISCONN_LOGICAL_LINK 0x043a -struct hci_cp_disconn_logical_link { - __le16 log_handle; -} __packed; - -#define HCI_OP_LOGICAL_LINK_CANCEL 0x043b -struct hci_cp_logical_link_cancel { - __u8 phy_handle; - __u8 flow_spec_id; -} __packed; - #define HCI_OP_ENHANCED_SETUP_SYNC_CONN 0x043d struct hci_coding_format { __u8 id; @@ -1606,46 +1540,6 @@ struct hci_rp_read_enc_key_size { __u8 key_size; } __packed; -#define HCI_OP_READ_LOCAL_AMP_INFO 0x1409 -struct hci_rp_read_local_amp_info { - __u8 status; - __u8 amp_status; - __le32 total_bw; - __le32 max_bw; - __le32 min_latency; - __le32 max_pdu; - __u8 amp_type; - __le16 pal_cap; - __le16 max_assoc_size; - __le32 max_flush_to; - __le32 be_flush_to; -} __packed; - -#define HCI_OP_READ_LOCAL_AMP_ASSOC 0x140a -struct hci_cp_read_local_amp_assoc { - __u8 phy_handle; - __le16 len_so_far; - __le16 max_len; -} __packed; -struct hci_rp_read_local_amp_assoc { - __u8 status; - __u8 phy_handle; - __le16 rem_len; - __u8 frag[]; -} __packed; - -#define HCI_OP_WRITE_REMOTE_AMP_ASSOC 0x140b -struct hci_cp_write_remote_amp_assoc { - __u8 phy_handle; - __le16 len_so_far; - __le16 rem_len; - __u8 frag[]; -} __packed; -struct hci_rp_write_remote_amp_assoc { - __u8 status; - __u8 phy_handle; -} __packed; - #define HCI_OP_GET_MWS_TRANSPORT_CONFIG 0x140c #define HCI_OP_ENABLE_DUT_MODE 0x1803 @@ -1657,6 +1551,15 @@ struct hci_cp_le_set_event_mask { __u8 mask[8]; } __packed; +/* BLUETOOTH CORE SPECIFICATION Version 5.4 | Vol 4, Part E + * 7.8.2 LE Read Buffer Size command + * MAX_LE_MTU is 0xffff. + * 0 is also valid. It means that no dedicated LE Buffer exists. + * It should use the HCI_Read_Buffer_Size command and mtu is shared + * between BR/EDR and LE. + */ +#define HCI_MIN_LE_MTU 0x001b + #define HCI_OP_LE_READ_BUFFER_SIZE 0x2002 struct hci_rp_le_read_buffer_size { __u8 status; @@ -2017,7 +1920,7 @@ struct hci_cp_le_set_ext_adv_data { __u8 operation; __u8 frag_pref; __u8 length; - __u8 data[]; + __u8 data[] __counted_by(length); } __packed; #define HCI_OP_LE_SET_EXT_SCAN_RSP_DATA 0x2038 @@ -2026,7 +1929,7 @@ struct hci_cp_le_set_ext_scan_rsp_data { __u8 operation; __u8 frag_pref; __u8 length; - __u8 data[]; + __u8 data[] __counted_by(length); } __packed; #define HCI_OP_LE_SET_EXT_ADV_ENABLE 0x2039 @@ -2052,7 +1955,7 @@ struct hci_cp_le_set_per_adv_data { __u8 handle; __u8 operation; __u8 length; - __u8 data[]; + __u8 data[] __counted_by(length); } __packed; #define HCI_OP_LE_SET_PER_ADV_ENABLE 0x2040 @@ -2135,7 +2038,7 @@ struct hci_cp_le_set_cig_params { __le16 c_latency; __le16 p_latency; __u8 num_cis; - struct hci_cis_params cis[]; + struct hci_cis_params cis[] __counted_by(num_cis); } __packed; struct hci_rp_le_set_cig_params { @@ -2153,7 +2056,7 @@ struct hci_cis { struct hci_cp_le_create_cis { __u8 num_cis; - struct hci_cis cis[]; + struct hci_cis cis[] __counted_by(num_cis); } __packed; #define HCI_OP_LE_REMOVE_CIG 0x2065 @@ -2207,7 +2110,7 @@ struct hci_cp_le_big_create_sync { __u8 mse; __le16 timeout; __u8 num_bis; - __u8 bis[]; + __u8 bis[] __counted_by(num_bis); } __packed; #define HCI_OP_LE_BIG_TERM_SYNC 0x206c diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 56fb42df44a3..9231396fe96f 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -126,7 +126,6 @@ enum suspended_state { struct hci_conn_hash { struct list_head list; unsigned int acl_num; - unsigned int amp_num; unsigned int sco_num; unsigned int iso_num; unsigned int le_num; @@ -247,6 +246,7 @@ struct adv_info { bool periodic; __u8 mesh; __u8 instance; + __u8 handle; __u32 flags; __u16 timeout; __u16 remaining_time; @@ -341,14 +341,6 @@ struct adv_monitor { /* Default authenticated payload timeout 30s */ #define DEFAULT_AUTH_PAYLOAD_TIMEOUT 0x0bb8 -struct amp_assoc { - __u16 len; - __u16 offset; - __u16 rem_len; - __u16 len_so_far; - __u8 data[HCI_MAX_AMP_ASSOC_SIZE]; -}; - #define HCI_MAX_PAGES 3 struct hci_dev { @@ -361,7 +353,6 @@ struct hci_dev { unsigned long flags; __u16 id; __u8 bus; - __u8 dev_type; bdaddr_t bdaddr; bdaddr_t setup_addr; bdaddr_t public_addr; @@ -467,21 +458,6 @@ struct hci_dev { __u16 sniff_min_interval; __u16 sniff_max_interval; - __u8 amp_status; - __u32 amp_total_bw; - __u32 amp_max_bw; - __u32 amp_min_latency; - __u32 amp_max_pdu; - __u8 amp_type; - __u16 amp_pal_cap; - __u16 amp_assoc_size; - __u32 amp_max_flush_to; - __u32 amp_be_flush_to; - - struct amp_assoc loc_assoc; - - __u8 flow_ctl_mode; - unsigned int auto_accept_delay; unsigned long quirks; @@ -501,11 +477,6 @@ struct hci_dev { unsigned int le_pkts; unsigned int iso_pkts; - __u16 block_len; - __u16 block_mtu; - __u16 num_blocks; - __u16 block_cnt; - unsigned long acl_last_tx; unsigned long sco_last_tx; unsigned long le_last_tx; @@ -706,6 +677,7 @@ struct hci_conn { __u16 handle; __u16 sync_handle; __u16 state; + __u16 mtu; __u8 mode; __u8 type; __u8 role; @@ -738,6 +710,8 @@ struct hci_conn { __u8 le_per_adv_data[HCI_MAX_PER_AD_TOT_LEN]; __u16 le_per_adv_data_len; __u16 le_per_adv_data_offset; + __u8 le_adv_phy; + __u8 le_adv_sec_phy; __u8 le_tx_phy; __u8 le_rx_phy; __s8 rssi; @@ -775,7 +749,6 @@ struct hci_conn { void *l2cap_data; void *sco_data; void *iso_data; - struct amp_mgr *amp_mgr; struct list_head link_list; struct hci_conn *parent; @@ -802,7 +775,6 @@ struct hci_chan { struct sk_buff_head data_q; unsigned int sent; __u8 state; - bool amp; }; struct hci_conn_params { @@ -1011,9 +983,6 @@ static inline void hci_conn_hash_add(struct hci_dev *hdev, struct hci_conn *c) case ACL_LINK: h->acl_num++; break; - case AMP_LINK: - h->amp_num++; - break; case LE_LINK: h->le_num++; if (c->role == HCI_ROLE_SLAVE) @@ -1040,9 +1009,6 @@ static inline void hci_conn_hash_del(struct hci_dev *hdev, struct hci_conn *c) case ACL_LINK: h->acl_num--; break; - case AMP_LINK: - h->amp_num--; - break; case LE_LINK: h->le_num--; if (c->role == HCI_ROLE_SLAVE) @@ -1064,8 +1030,6 @@ static inline unsigned int hci_conn_num(struct hci_dev *hdev, __u8 type) switch (type) { case ACL_LINK: return h->acl_num; - case AMP_LINK: - return h->amp_num; case LE_LINK: return h->le_num; case SCO_LINK: @@ -1082,7 +1046,7 @@ static inline unsigned int hci_conn_count(struct hci_dev *hdev) { struct hci_conn_hash *c = &hdev->conn_hash; - return c->acl_num + c->amp_num + c->sco_num + c->le_num + c->iso_num; + return c->acl_num + c->sco_num + c->le_num + c->iso_num; } static inline bool hci_conn_valid(struct hci_dev *hdev, struct hci_conn *conn) @@ -1371,8 +1335,7 @@ hci_conn_hash_lookup_pa_sync_handle(struct hci_dev *hdev, __u16 sync_handle) rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != ISO_LINK || - !test_bit(HCI_CONN_PA_SYNC, &c->flags)) + if (c->type != ISO_LINK) continue; if (c->sync_handle == sync_handle) { @@ -1512,7 +1475,7 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, enum conn_reasons conn_reason); struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, bool dst_resolved, u8 sec_level, - u16 conn_timeout, u8 role); + u16 conn_timeout, u8 role, u8 phy, u8 sec_phy); void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status); struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, u8 sec_level, u8 auth_type, @@ -1608,10 +1571,6 @@ static inline void hci_conn_drop(struct hci_conn *conn) } break; - case AMP_LINK: - timeo = conn->disc_timeout; - break; - default: timeo = 0; break; @@ -1905,6 +1864,10 @@ void hci_conn_del_sysfs(struct hci_conn *conn); #define privacy_mode_capable(dev) (use_ll_privacy(dev) && \ (hdev->commands[39] & 0x04)) +#define read_key_size_capable(dev) \ + ((dev)->commands[20] & 0x10 && \ + !test_bit(HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, &hdev->quirks)) + /* Use enhanced synchronous connection if command is supported and its quirk * has not been set. */ @@ -2229,8 +2192,22 @@ void hci_mgmt_chan_unregister(struct hci_mgmt_chan *c); /* These LE scan and inquiry parameters were chosen according to LE General * Discovery Procedure specification. */ -#define DISCOV_LE_SCAN_WIN 0x12 -#define DISCOV_LE_SCAN_INT 0x12 +#define DISCOV_LE_SCAN_WIN 0x0012 /* 11.25 msec */ +#define DISCOV_LE_SCAN_INT 0x0012 /* 11.25 msec */ +#define DISCOV_LE_SCAN_INT_FAST 0x0060 /* 60 msec */ +#define DISCOV_LE_SCAN_WIN_FAST 0x0030 /* 30 msec */ +#define DISCOV_LE_SCAN_INT_CONN 0x0060 /* 60 msec */ +#define DISCOV_LE_SCAN_WIN_CONN 0x0060 /* 60 msec */ +#define DISCOV_LE_SCAN_INT_SLOW1 0x0800 /* 1.28 sec */ +#define DISCOV_LE_SCAN_WIN_SLOW1 0x0012 /* 11.25 msec */ +#define DISCOV_LE_SCAN_INT_SLOW2 0x1000 /* 2.56 sec */ +#define DISCOV_LE_SCAN_WIN_SLOW2 0x0024 /* 22.5 msec */ +#define DISCOV_CODED_SCAN_INT_FAST 0x0120 /* 180 msec */ +#define DISCOV_CODED_SCAN_WIN_FAST 0x0090 /* 90 msec */ +#define DISCOV_CODED_SCAN_INT_SLOW1 0x1800 /* 3.84 sec */ +#define DISCOV_CODED_SCAN_WIN_SLOW1 0x0036 /* 33.75 msec */ +#define DISCOV_CODED_SCAN_INT_SLOW2 0x3000 /* 7.68 sec */ +#define DISCOV_CODED_SCAN_WIN_SLOW2 0x006c /* 67.5 msec */ #define DISCOV_LE_TIMEOUT 10240 /* msec */ #define DISCOV_INTERLEAVED_TIMEOUT 5120 /* msec */ #define DISCOV_INTERLEAVED_INQUIRY_LEN 0x04 diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index a4278aa618ab..5cfdc813491a 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -463,18 +463,24 @@ struct l2cap_le_credits { #define L2CAP_ECRED_MAX_CID 5 struct l2cap_ecred_conn_req { - __le16 psm; - __le16 mtu; - __le16 mps; - __le16 credits; + /* New members must be added within the struct_group() macro below. */ + __struct_group(l2cap_ecred_conn_req_hdr, hdr, __packed, + __le16 psm; + __le16 mtu; + __le16 mps; + __le16 credits; + ); __le16 scid[]; } __packed; struct l2cap_ecred_conn_rsp { - __le16 mtu; - __le16 mps; - __le16 credits; - __le16 result; + /* New members must be added within the struct_group() macro below. */ + struct_group_tagged(l2cap_ecred_conn_rsp_hdr, hdr, + __le16 mtu; + __le16 mps; + __le16 credits; + __le16 result; + ); __le16 dcid[]; }; @@ -548,6 +554,9 @@ struct l2cap_chan { __u16 tx_credits; __u16 rx_credits; + /* estimated available receive buffer space or -1 if unknown */ + ssize_t rx_avail; + __u8 tx_state; __u8 rx_state; @@ -682,10 +691,15 @@ struct l2cap_user { /* ----- L2CAP socket info ----- */ #define l2cap_pi(sk) ((struct l2cap_pinfo *) sk) +struct l2cap_rx_busy { + struct list_head list; + struct sk_buff *skb; +}; + struct l2cap_pinfo { struct bt_sock bt; struct l2cap_chan *chan; - struct sk_buff *rx_busy_skb; + struct list_head rx_busy; }; enum { @@ -943,6 +957,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, int l2cap_chan_reconfigure(struct l2cap_chan *chan, __u16 mtu); int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len); void l2cap_chan_busy(struct l2cap_chan *chan, int busy); +void l2cap_chan_rx_avail(struct l2cap_chan *chan, ssize_t rx_avail); int l2cap_chan_check_security(struct l2cap_chan *chan, bool initiator); void l2cap_chan_set_defaults(struct l2cap_chan *chan); int l2cap_ertm_init(struct l2cap_chan *chan); diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 2e2be4fd2bb6..cbf1664dc569 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1149,6 +1149,8 @@ ieee80211_chandef_max_power(struct cfg80211_chan_def *chandef) * @band_mask: which bands to check on * @prohibited_flags: which channels to not consider usable, * %IEEE80211_CHAN_DISABLED is always taken into account + * + * Return: %true if usable channels found, %false otherwise */ bool cfg80211_any_usable_channels(struct wiphy *wiphy, unsigned long band_mask, @@ -1575,6 +1577,8 @@ struct cfg80211_csa_settings { * @beacon_next: beacon data to be used after the color change * @count: number of beacons until the color change * @color: the color used after the change + * @link_id: defines the link on which color change is expected during MLO. + * 0 in case of non-MLO. */ struct cfg80211_color_change_settings { struct cfg80211_beacon_data beacon_color_change; @@ -1583,6 +1587,7 @@ struct cfg80211_color_change_settings { struct cfg80211_beacon_data beacon_next; u8 count; u8 color; + u8 link_id; }; /** @@ -1833,9 +1838,11 @@ enum cfg80211_station_type { * * Utility function for the @change_station driver method. Call this function * with the appropriate station type looking up the station (and checking that - * it exists). It will verify whether the station change is acceptable, and if - * not will return an error code. Note that it may modify the parameters for - * backward compatibility reasons, so don't use them before calling this. + * it exists). It will verify whether the station change is acceptable. + * + * Return: 0 if the change is acceptable, otherwise an error code. Note that + * it may modify the parameters for backward compatibility reasons, so don't + * use them before calling this. */ int cfg80211_check_station_change(struct wiphy *wiphy, struct station_parameters *params, @@ -2229,7 +2236,7 @@ struct cfg80211_sar_capa { * @mac_addr: the mac address of the station of interest * @sinfo: pointer to the structure to fill with the information * - * Returns 0 on success and sinfo is filled with the available information + * Return: 0 on success and sinfo is filled with the available information * otherwise returns a negative error code and the content of sinfo has to be * considered undefined. */ @@ -4991,6 +4998,7 @@ struct cfg80211_ops { * set this flag to update channels on beacon hints. * @WIPHY_FLAG_SUPPORTS_NSTR_NONPRIMARY: support connection to non-primary link * of an NSTR mobile AP MLD. + * @WIPHY_FLAG_DISABLE_WEXT: disable wireless extensions for this device */ enum wiphy_flags { WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK = BIT(0), @@ -5002,6 +5010,7 @@ enum wiphy_flags { WIPHY_FLAG_4ADDR_STATION = BIT(6), WIPHY_FLAG_CONTROL_PORT_PROTOCOL = BIT(7), WIPHY_FLAG_IBSS_RSN = BIT(8), + WIPHY_FLAG_DISABLE_WEXT = BIT(9), WIPHY_FLAG_MESH_AUTH = BIT(10), WIPHY_FLAG_SUPPORTS_EXT_KCK_32 = BIT(11), WIPHY_FLAG_SUPPORTS_NSTR_NONPRIMARY = BIT(12), @@ -5332,6 +5341,8 @@ struct wiphy_iftype_ext_capab { * cfg80211_get_iftype_ext_capa - lookup interface type extended capability * @wiphy: the wiphy to look up from * @type: the interface type to look up + * + * Return: The extended capability for the given interface @type, may be %NULL */ const struct wiphy_iftype_ext_capab * cfg80211_get_iftype_ext_capa(struct wiphy *wiphy, enum nl80211_iftype type); @@ -5902,6 +5913,10 @@ int wiphy_register(struct wiphy *wiphy); /** * get_wiphy_regdom - get custom regdomain for the given wiphy * @wiphy: the wiphy to get the regdomain from + * + * Context: Requires any of RTNL, wiphy mutex or RCU protection. + * + * Return: pointer to the regulatory domain associated with the wiphy */ const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy); @@ -6419,6 +6434,8 @@ ieee80211_get_channel(struct wiphy *wiphy, int freq) * * The Preferred Scanning Channels (PSC) are defined in * Draft IEEE P802.11ax/D5.0, 26.17.2.3.3 + * + * Return: %true if channel is a PSC, %false otherwise */ static inline bool cfg80211_channel_is_psc(struct ieee80211_channel *chan) { @@ -6448,8 +6465,8 @@ ieee80211_get_response_rate(struct ieee80211_supported_band *sband, * ieee80211_mandatory_rates - get mandatory rates for a given band * @sband: the band to look for rates in * - * This function returns a bitmap of the mandatory rates for the given - * band, bits are set according to the rate position in the bitrates array. + * Return: a bitmap of the mandatory rates for the given band, bits + * are set according to the rate position in the bitrates array. */ u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband); @@ -6663,6 +6680,8 @@ bool ieee80211_get_8023_tunnel_proto(const void *hdr, __be16 *proto); * header to the ethernet header (if present). * * @skb: The 802.3 frame with embedded mesh header + * + * Return: 0 on success. Non-zero on error. */ int ieee80211_strip_8023_mesh_hdr(struct sk_buff *skb); @@ -7041,6 +7060,8 @@ const struct ieee80211_reg_rule *freq_reg_info(struct wiphy *wiphy, * * You can use this to map the regulatory request initiator enum to a * proper string representation. + * + * Return: pointer to string representation of the initiator */ const char *reg_initiator_name(enum nl80211_reg_initiator initiator); @@ -7049,6 +7070,8 @@ const char *reg_initiator_name(enum nl80211_reg_initiator initiator); * @wiphy: wiphy for which pre-CAC capability is checked. * * Pre-CAC is allowed only in some regdomains (notable ETSI). + * + * Return: %true if allowed, %false otherwise */ bool regulatory_pre_cac_allowed(struct wiphy *wiphy); @@ -7183,6 +7206,8 @@ static inline void cfg80211_gen_new_bssid(const u8 *bssid, u8 max_bssid, * cfg80211_is_element_inherited - returns if element ID should be inherited * @element: element to check * @non_inherit_element: non inheritance element + * + * Return: %true if should be inherited, %false otherwise */ bool cfg80211_is_element_inherited(const struct element *element, const struct element *non_inherit_element); @@ -7195,6 +7220,8 @@ bool cfg80211_is_element_inherited(const struct element *element, * @sub_elem: current MBSSID subelement (profile) * @merged_ie: location of the merged profile * @max_copy_len: max merged profile length + * + * Return: the number of bytes merged */ size_t cfg80211_merge_profile(const u8 *ie, size_t ielen, const struct element *mbssid_elem, @@ -7222,7 +7249,7 @@ enum cfg80211_bss_frame_type { * @ielen: length of IEs * @band: enum nl80211_band of the channel * - * Returns the channel number, or -1 if none could be determined. + * Return: the channel number, or -1 if none could be determined. */ int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen, enum nl80211_band band); @@ -7300,6 +7327,8 @@ cfg80211_inform_bss(struct wiphy *wiphy, * @bss_type: type of BSS, see &enum ieee80211_bss_type * @privacy: privacy filter, see &enum ieee80211_privacy * @use_for: indicates which use is intended + * + * Return: Reference-counted BSS on success. %NULL on error. */ struct cfg80211_bss *__cfg80211_get_bss(struct wiphy *wiphy, struct ieee80211_channel *channel, @@ -7320,6 +7349,8 @@ struct cfg80211_bss *__cfg80211_get_bss(struct wiphy *wiphy, * @privacy: privacy filter, see &enum ieee80211_privacy * * This version implies regular usage, %NL80211_BSS_USE_FOR_NORMAL. + * + * Return: Reference-counted BSS on success. %NULL on error. */ static inline struct cfg80211_bss * cfg80211_get_bss(struct wiphy *wiphy, struct ieee80211_channel *channel, @@ -7704,8 +7735,9 @@ int cfg80211_vendor_cmd_reply(struct sk_buff *skb); * cfg80211_vendor_cmd_get_sender - get the current sender netlink ID * @wiphy: the wiphy * - * Return the current netlink port ID in a vendor command handler. - * Valid to call only there. + * Return: the current netlink port ID in a vendor command handler. + * + * Context: May only be called from a vendor command handler */ unsigned int cfg80211_vendor_cmd_get_sender(struct wiphy *wiphy); @@ -8258,6 +8290,8 @@ void cfg80211_tx_mgmt_expired(struct wireless_dev *wdev, u64 cookie, * * @sinfo: the station information * @gfp: allocation flags + * + * Return: 0 on success. Non-zero on error. */ int cfg80211_sinfo_alloc_tid_stats(struct station_info *sinfo, gfp_t gfp); @@ -8775,13 +8809,13 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy, * also checks if IR-relaxation conditions apply, to allow beaconing under * more permissive conditions. * - * Requires the wiphy mutex to be held. + * Context: Requires the wiphy mutex to be held. */ bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, enum nl80211_iftype iftype); -/* +/** * cfg80211_ch_switch_notify - update wdev channel and notify userspace * @dev: the device which switched channels * @chandef: the new channel definition @@ -8794,7 +8828,7 @@ void cfg80211_ch_switch_notify(struct net_device *dev, struct cfg80211_chan_def *chandef, unsigned int link_id); -/* +/** * cfg80211_ch_switch_started_notify - notify channel switch start * @dev: the device on which the channel switch started * @chandef: the future channel definition @@ -8817,7 +8851,7 @@ void cfg80211_ch_switch_started_notify(struct net_device *dev, * @operating_class: the operating class to convert * @band: band pointer to fill * - * Returns %true if the conversion was successful, %false otherwise. + * Return: %true if the conversion was successful, %false otherwise. */ bool ieee80211_operating_class_to_band(u8 operating_class, enum nl80211_band *band); @@ -8829,7 +8863,7 @@ bool ieee80211_operating_class_to_band(u8 operating_class, * @chan: the ieee80211_channel to convert * @chandef: a pointer to the resulting chandef * - * Returns %true if the conversion was successful, %false otherwise. + * Return: %true if the conversion was successful, %false otherwise. */ bool ieee80211_operating_class_to_chandef(u8 operating_class, struct ieee80211_channel *chan, @@ -8841,7 +8875,7 @@ bool ieee80211_operating_class_to_chandef(u8 operating_class, * @chandef: the chandef to convert * @op_class: a pointer to the resulting operating class * - * Returns %true if the conversion was successful, %false otherwise. + * Return: %true if the conversion was successful, %false otherwise. */ bool ieee80211_chandef_to_operating_class(struct cfg80211_chan_def *chandef, u8 *op_class); @@ -8851,7 +8885,7 @@ bool ieee80211_chandef_to_operating_class(struct cfg80211_chan_def *chandef, * * @chandef: the chandef to convert * - * Returns the center frequency of chandef (1st segment) in KHz. + * Return: the center frequency of chandef (1st segment) in KHz. */ static inline u32 ieee80211_chandef_to_khz(const struct cfg80211_chan_def *chandef) @@ -8859,7 +8893,7 @@ ieee80211_chandef_to_khz(const struct cfg80211_chan_def *chandef) return MHZ_TO_KHZ(chandef->center_freq1) + chandef->freq1_offset; } -/* +/** * cfg80211_tdls_oper_request - request userspace to perform TDLS operation * @dev: the device on which the operation is requested * @peer: the MAC address of the peer device @@ -8878,11 +8912,11 @@ void cfg80211_tdls_oper_request(struct net_device *dev, const u8 *peer, enum nl80211_tdls_operation oper, u16 reason_code, gfp_t gfp); -/* +/** * cfg80211_calculate_bitrate - calculate actual bitrate (in 100Kbps units) * @rate: given rate_info to calculate bitrate from * - * return 0 if MCS index >= 32 + * Return: calculated bitrate */ u32 cfg80211_calculate_bitrate(struct rate_info *rate); @@ -8896,7 +8930,7 @@ u32 cfg80211_calculate_bitrate(struct rate_info *rate); * when the driver wishes to unregister the wdev, e.g. when the hardware device * is unbound from the driver. * - * Requires the RTNL and wiphy mutex to be held. + * Context: Requires the RTNL and wiphy mutex to be held. */ void cfg80211_unregister_wdev(struct wireless_dev *wdev); @@ -8909,7 +8943,9 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev); * held. Otherwise, both register_netdevice() and register_netdev() are usable * instead as well. * - * Requires the RTNL and wiphy mutex to be held. + * Context: Requires the RTNL and wiphy mutex to be held. + * + * Return: 0 on success. Non-zero on error. */ int cfg80211_register_netdevice(struct net_device *dev); @@ -8922,7 +8958,7 @@ int cfg80211_register_netdevice(struct net_device *dev); * is held. Otherwise, both unregister_netdevice() and unregister_netdev() are * usable instead as well. * - * Requires the RTNL and wiphy mutex to be held. + * Context: Requires the RTNL and wiphy mutex to be held. */ static inline void cfg80211_unregister_netdevice(struct net_device *dev) { @@ -8998,9 +9034,9 @@ int cfg80211_get_p2p_attr(const u8 *ies, unsigned int len, * correctly, if not the result of using this function will not * be ordered correctly either, i.e. it does no reordering. * - * The function returns the offset where the next part of the - * buffer starts, which may be @ielen if the entire (remainder) - * of the buffer should be used. + * Return: The offset where the next part of the buffer starts, which + * may be @ielen if the entire (remainder) of the buffer should be + * used. */ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen, const u8 *ids, int n_ids, @@ -9028,9 +9064,9 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen, * correctly, if not the result of using this function will not * be ordered correctly either, i.e. it does no reordering. * - * The function returns the offset where the next part of the - * buffer starts, which may be @ielen if the entire (remainder) - * of the buffer should be used. + * Return: The offset where the next part of the buffer starts, which + * may be @ielen if the entire (remainder) of the buffer should be + * used. */ static inline size_t ieee80211_ie_split(const u8 *ies, size_t ielen, const u8 *ids, int n_ids, size_t offset) @@ -9094,6 +9130,8 @@ unsigned int ieee80211_get_num_supported_channels(struct wiphy *wiphy); * This function can be called by the driver to check whether a * combination of interfaces and their types are allowed according to * the interface combinations. + * + * Return: 0 if combinations are allowed. Non-zero on error. */ int cfg80211_check_combinations(struct wiphy *wiphy, struct iface_combination_params *params); @@ -9109,6 +9147,8 @@ int cfg80211_check_combinations(struct wiphy *wiphy, * This function can be called by the driver to check what possible * combinations it fits in at a given moment, e.g. for channel switching * purposes. + * + * Return: 0 on success. Non-zero on error. */ int cfg80211_iter_combinations(struct wiphy *wiphy, struct iface_combination_params *params, @@ -9116,7 +9156,7 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, void *data), void *data); -/* +/** * cfg80211_stop_iface - trigger interface disconnection * * @wiphy: the wiphy @@ -9171,6 +9211,8 @@ static inline void wiphy_ext_feature_set(struct wiphy *wiphy, * * The extended features are flagged in multiple bytes (see * &struct wiphy.@ext_features) + * + * Return: %true if extended feature flag is set, %false otherwise */ static inline bool wiphy_ext_feature_isset(struct wiphy *wiphy, @@ -9292,6 +9334,8 @@ void cfg80211_pmsr_complete(struct wireless_dev *wdev, * Check whether the interface is allowed to operate; additionally, this API * can be used to check iftype against the software interfaces when * check_swif is '1'. + * + * Return: %true if allowed, %false otherwise */ bool cfg80211_iftype_allowed(struct wiphy *wiphy, enum nl80211_iftype iftype, bool is_4addr, u8 check_swif); @@ -9384,60 +9428,78 @@ void cfg80211_bss_flush(struct wiphy *wiphy); * @cmd: the actual event we want to notify * @count: the number of TBTTs until the color change happens * @color_bitmap: representations of the colors that the local BSS is aware of + * @link_id: valid link_id in case of MLO or 0 for non-MLO. + * + * Return: 0 on success. Non-zero on error. */ int cfg80211_bss_color_notify(struct net_device *dev, enum nl80211_commands cmd, u8 count, - u64 color_bitmap); + u64 color_bitmap, u8 link_id); /** * cfg80211_obss_color_collision_notify - notify about bss color collision * @dev: network device * @color_bitmap: representations of the colors that the local BSS is aware of + * @link_id: valid link_id in case of MLO or 0 for non-MLO. + * + * Return: 0 on success. Non-zero on error. */ static inline int cfg80211_obss_color_collision_notify(struct net_device *dev, - u64 color_bitmap) + u64 color_bitmap, + u8 link_id) { return cfg80211_bss_color_notify(dev, NL80211_CMD_OBSS_COLOR_COLLISION, - 0, color_bitmap); + 0, color_bitmap, link_id); } /** * cfg80211_color_change_started_notify - notify color change start * @dev: the device on which the color is switched * @count: the number of TBTTs until the color change happens + * @link_id: valid link_id in case of MLO or 0 for non-MLO. * * Inform the userspace about the color change that has started. + * + * Return: 0 on success. Non-zero on error. */ static inline int cfg80211_color_change_started_notify(struct net_device *dev, - u8 count) + u8 count, u8 link_id) { return cfg80211_bss_color_notify(dev, NL80211_CMD_COLOR_CHANGE_STARTED, - count, 0); + count, 0, link_id); } /** * cfg80211_color_change_aborted_notify - notify color change abort * @dev: the device on which the color is switched + * @link_id: valid link_id in case of MLO or 0 for non-MLO. * * Inform the userspace about the color change that has aborted. + * + * Return: 0 on success. Non-zero on error. */ -static inline int cfg80211_color_change_aborted_notify(struct net_device *dev) +static inline int cfg80211_color_change_aborted_notify(struct net_device *dev, + u8 link_id) { return cfg80211_bss_color_notify(dev, NL80211_CMD_COLOR_CHANGE_ABORTED, - 0, 0); + 0, 0, link_id); } /** * cfg80211_color_change_notify - notify color change completion * @dev: the device on which the color was switched + * @link_id: valid link_id in case of MLO or 0 for non-MLO. * * Inform the userspace about the color change that has completed. + * + * Return: 0 on success. Non-zero on error. */ -static inline int cfg80211_color_change_notify(struct net_device *dev) +static inline int cfg80211_color_change_notify(struct net_device *dev, + u8 link_id) { return cfg80211_bss_color_notify(dev, NL80211_CMD_COLOR_CHANGE_COMPLETED, - 0, 0); + 0, 0, link_id); } /** @@ -9475,6 +9537,8 @@ void cfg80211_schedule_channels_check(struct wireless_dev *wdev); * @ppos: read position * @handler: the read handler to call (under wiphy lock) * @data: additional data to pass to the read handler + * + * Return: the number of characters read, or a negative errno */ ssize_t wiphy_locked_debugfs_read(struct wiphy *wiphy, struct file *file, char *buf, size_t bufsize, @@ -9497,6 +9561,8 @@ ssize_t wiphy_locked_debugfs_read(struct wiphy *wiphy, struct file *file, * @count: read count * @handler: the write handler to call (under wiphy lock) * @data: additional data to pass to the write handler + * + * Return: the number of characters written, or a negative errno */ ssize_t wiphy_locked_debugfs_write(struct wiphy *wiphy, struct file *file, char *buf, size_t bufsize, diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h index 53dd7d988a2d..c9111bb2f59b 100644 --- a/include/net/cipso_ipv4.h +++ b/include/net/cipso_ipv4.h @@ -183,7 +183,8 @@ int cipso_v4_getattr(const unsigned char *cipso, struct netlbl_lsm_secattr *secattr); int cipso_v4_sock_setattr(struct sock *sk, const struct cipso_v4_doi *doi_def, - const struct netlbl_lsm_secattr *secattr); + const struct netlbl_lsm_secattr *secattr, + bool sk_locked); void cipso_v4_sock_delattr(struct sock *sk); int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr); int cipso_v4_req_setattr(struct request_sock *req, @@ -214,7 +215,8 @@ static inline int cipso_v4_getattr(const unsigned char *cipso, static inline int cipso_v4_sock_setattr(struct sock *sk, const struct cipso_v4_doi *doi_def, - const struct netlbl_lsm_secattr *secattr) + const struct netlbl_lsm_secattr *secattr, + bool sk_locked) { return -ENOSYS; } diff --git a/include/net/devlink.h b/include/net/devlink.h index 9ac394bdfbe4..35eb0f884386 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -483,7 +483,8 @@ struct devlink_param { int (*get)(struct devlink *devlink, u32 id, struct devlink_param_gset_ctx *ctx); int (*set)(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx); + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack); int (*validate)(struct devlink *devlink, u32 id, union devlink_param_value val, struct netlink_ext_ack *extack); @@ -599,12 +600,14 @@ enum devlink_param_generic_id { .validate = _validate, \ } -/* Part number, identifier of board design */ +/* Identifier of board design */ #define DEVLINK_INFO_VERSION_GENERIC_BOARD_ID "board.id" /* Revision of board design */ #define DEVLINK_INFO_VERSION_GENERIC_BOARD_REV "board.rev" /* Maker of the board */ #define DEVLINK_INFO_VERSION_GENERIC_BOARD_MANUFACTURE "board.manufacture" +/* Part number of the board and its components */ +#define DEVLINK_INFO_VERSION_GENERIC_BOARD_PART_NUMBER "board.part_number" /* Part number, identifier of asic design */ #define DEVLINK_INFO_VERSION_GENERIC_ASIC_ID "asic.id" @@ -1602,6 +1605,14 @@ void devlink_free(struct devlink *devlink); * capability. Should be used by device drivers to * enable/disable ipsec_packet capability of a * function managed by the devlink port. + * @port_fn_max_io_eqs_get: Callback used to get port function's maximum number + * of event queues. Should be used by device drivers to + * report the maximum event queues of a function + * managed by the devlink port. + * @port_fn_max_io_eqs_set: Callback used to set port function's maximum number + * of event queues. Should be used by device drivers to + * configure maximum number of event queues + * of a function managed by the devlink port. * * Note: Driver should return -EOPNOTSUPP if it doesn't support * port function (@port_fn_*) handling for a particular port. @@ -1651,6 +1662,12 @@ struct devlink_port_ops { int (*port_fn_ipsec_packet_set)(struct devlink_port *devlink_port, bool enable, struct netlink_ext_ack *extack); + int (*port_fn_max_io_eqs_get)(struct devlink_port *devlink_port, + u32 *max_eqs, + struct netlink_ext_ack *extack); + int (*port_fn_max_io_eqs_set)(struct devlink_port *devlink_port, + u32 max_eqs, + struct netlink_ext_ack *extack); }; void devlink_port_init(struct devlink *devlink, diff --git a/include/net/dsa.h b/include/net/dsa.h index 7c0da9effe4e..b60e7e410aba 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -24,9 +24,6 @@ struct dsa_8021q_context; struct tc_action; -struct phy_device; -struct fixed_phy_status; -struct phylink_link_state; #define DSA_TAG_PROTO_NONE_VALUE 0 #define DSA_TAG_PROTO_BRCM_VALUE 1 @@ -327,6 +324,12 @@ struct dsa_port { }; }; +static inline struct dsa_port * +dsa_phylink_to_port(struct phylink_config *config) +{ + return container_of(config, struct dsa_port, pl_config); +} + /* TODO: ideally DSA ports would have a single dp->link_dp member, * and no dst->rtable nor this struct dsa_link would be needed, * but this would require some more complex tree walking, @@ -430,6 +433,11 @@ struct dsa_switch { */ u32 fdb_isolation:1; + /* Drivers that have global DSCP mapping settings must set this to + * true to automatically apply the settings to all ports. + */ + u32 dscp_prio_mapping_is_global:1; + /* Listener for switch fabric events */ struct notifier_block nb; @@ -452,6 +460,11 @@ struct dsa_switch { const struct dsa_switch_ops *ops; /* + * Allow a DSA switch driver to override the phylink MAC ops + */ + const struct phylink_mac_ops *phylink_mac_ops; + + /* * User mii_bus and devices for the individual ports. */ u32 phys_mii_mask; @@ -578,6 +591,10 @@ static inline bool dsa_is_user_port(struct dsa_switch *ds, int p) dsa_switch_for_each_port((_dp), (_ds)) \ if (dsa_port_is_user((_dp))) +#define dsa_switch_for_each_user_port_continue_reverse(_dp, _ds) \ + dsa_switch_for_each_port_continue_reverse((_dp), (_ds)) \ + if (dsa_port_is_user((_dp))) + #define dsa_switch_for_each_cpu_port(_dp, _ds) \ dsa_switch_for_each_port((_dp), (_ds)) \ if (dsa_port_is_cpu((_dp))) @@ -858,14 +875,6 @@ struct dsa_switch_ops { int regnum, u16 val); /* - * Link state adjustment (called from libphy) - */ - void (*adjust_link)(struct dsa_switch *ds, int port, - struct phy_device *phydev); - void (*fixed_link_update)(struct dsa_switch *ds, int port, - struct fixed_phy_status *st); - - /* * PHYLINK integration */ void (*phylink_get_caps)(struct dsa_switch *ds, int port, @@ -955,6 +964,10 @@ struct dsa_switch_ops { u8 prio); int (*port_del_dscp_prio)(struct dsa_switch *ds, int port, u8 dscp, u8 prio); + int (*port_set_apptrust)(struct dsa_switch *ds, int port, + const u8 *sel, int nsel); + int (*port_get_apptrust)(struct dsa_switch *ds, int port, u8 *sel, + int *nsel); /* * Suspend and resume @@ -1247,7 +1260,8 @@ struct dsa_switch_ops { int dsa_devlink_param_get(struct devlink *dl, u32 id, struct devlink_param_gset_ctx *ctx); int dsa_devlink_param_set(struct devlink *dl, u32 id, - struct devlink_param_gset_ctx *ctx); + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack); int dsa_devlink_params_register(struct dsa_switch *ds, const struct devlink_param *params, size_t params_count); diff --git a/include/net/dscp.h b/include/net/dscp.h new file mode 100644 index 000000000000..ba40540868c9 --- /dev/null +++ b/include/net/dscp.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2024 Pengutronix, Oleksij Rempel <kernel@pengutronix.de> */ + +#ifndef __DSCP_H__ +#define __DSCP_H__ + +/* + * DSCP Pools and Codepoint Space Division: + * + * The Differentiated Services (Diffserv) architecture defines a method for + * classifying and managing network traffic using the DS field in IPv4 and IPv6 + * packet headers. This field can carry one of 64 distinct DSCP (Differentiated + * Services Code Point) values, which are divided into three pools based on + * their Least Significant Bits (LSB) patterns and intended usage. Each pool has + * a specific registration procedure for assigning DSCP values: + * + * Pool 1 (Standards Action Pool): + * - Codepoint Space: xxxxx0 + * This pool includes DSCP values ending in '0' (binary), allocated via + * Standards Action. It is intended for globally recognized traffic classes, + * ensuring interoperability across the internet. This pool encompasses + * well-known DSCP values such as CS0-CS7, AFxx, EF, and VOICE-ADMIT. + * + * Pool 2 (Experimental/Local Use Pool): + * - Codepoint Space: xxxx11 + * Reserved for DSCP values ending in '11' (binary), this pool is designated + * for Experimental or Local Use. It allows for private or temporary traffic + * marking schemes not intended for standardized global use, facilitating + * testing and network-specific configurations without impacting + * interoperability. + * + * Pool 3 (Preferential Standardization Pool): + * - Codepoint Space: xxxx01 + * Initially reserved for experimental or local use, this pool now serves as + * a secondary standardization resource should Pool 1 become exhausted. DSCP + * values ending in '01' (binary) are assigned via Standards Action, with a + * focus on adopting new, standardized traffic classes as the need arises. + * + * For pool updates see: + * https://www.iana.org/assignments/dscp-registry/dscp-registry.xhtml + */ + +/* Pool 1: Standardized DSCP values as per [RFC8126] */ +#define DSCP_CS0 0 /* 000000, [RFC2474] */ +/* CS0 is some times called default (DF) */ +#define DSCP_DF 0 /* 000000, [RFC2474] */ +#define DSCP_CS1 8 /* 001000, [RFC2474] */ +#define DSCP_CS2 16 /* 010000, [RFC2474] */ +#define DSCP_CS3 24 /* 011000, [RFC2474] */ +#define DSCP_CS4 32 /* 100000, [RFC2474] */ +#define DSCP_CS5 40 /* 101000, [RFC2474] */ +#define DSCP_CS6 48 /* 110000, [RFC2474] */ +#define DSCP_CS7 56 /* 111000, [RFC2474] */ +#define DSCP_AF11 10 /* 001010, [RFC2597] */ +#define DSCP_AF12 12 /* 001100, [RFC2597] */ +#define DSCP_AF13 14 /* 001110, [RFC2597] */ +#define DSCP_AF21 18 /* 010010, [RFC2597] */ +#define DSCP_AF22 20 /* 010100, [RFC2597] */ +#define DSCP_AF23 22 /* 010110, [RFC2597] */ +#define DSCP_AF31 26 /* 011010, [RFC2597] */ +#define DSCP_AF32 28 /* 011100, [RFC2597] */ +#define DSCP_AF33 30 /* 011110, [RFC2597] */ +#define DSCP_AF41 34 /* 100010, [RFC2597] */ +#define DSCP_AF42 36 /* 100100, [RFC2597] */ +#define DSCP_AF43 38 /* 100110, [RFC2597] */ +#define DSCP_EF 46 /* 101110, [RFC3246] */ +#define DSCP_VOICE_ADMIT 44 /* 101100, [RFC5865] */ + +/* Pool 3: Standardized assignments, previously available for experimental/local + * use + */ +#define DSCP_LE 1 /* 000001, [RFC8622] */ + +#define DSCP_MAX 64 + +#endif /* __DSCP_H__ */ diff --git a/include/net/dst_cache.h b/include/net/dst_cache.h index df6622a5fe98..b4a55d2d5e71 100644 --- a/include/net/dst_cache.h +++ b/include/net/dst_cache.h @@ -76,7 +76,7 @@ struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache, */ static inline void dst_cache_reset(struct dst_cache *dst_cache) { - dst_cache->reset_ts = jiffies; + WRITE_ONCE(dst_cache->reset_ts, jiffies); } /** diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 1b7fae4c6b24..4160731dcb6e 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -198,7 +198,7 @@ static inline struct metadata_dst *__ip_tun_set_dst(__be32 saddr, __be32 daddr, __u8 tos, __u8 ttl, __be16 tp_dst, - __be16 flags, + const unsigned long *flags, __be64 tunnel_id, int md_size) { @@ -215,7 +215,7 @@ static inline struct metadata_dst *__ip_tun_set_dst(__be32 saddr, } static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, - __be16 flags, + const unsigned long *flags, __be64 tunnel_id, int md_size) { @@ -230,7 +230,7 @@ static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *sad __u8 tos, __u8 ttl, __be16 tp_dst, __be32 label, - __be16 flags, + const unsigned long *flags, __be64 tunnel_id, int md_size) { @@ -243,7 +243,7 @@ static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *sad info = &tun_dst->u.tun_info; info->mode = IP_TUNNEL_INFO_IPV6; - info->key.tun_flags = flags; + ip_tunnel_flags_copy(info->key.tun_flags, flags); info->key.tun_id = tunnel_id; info->key.tp_src = 0; info->key.tp_dst = tp_dst; @@ -259,7 +259,7 @@ static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *sad } static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, - __be16 flags, + const unsigned long *flags, __be64 tunnel_id, int md_size) { diff --git a/include/net/espintcp.h b/include/net/espintcp.h index 0335bbd76552..c70efd704b6d 100644 --- a/include/net/espintcp.h +++ b/include/net/espintcp.h @@ -32,7 +32,7 @@ struct espintcp_ctx { static inline struct espintcp_ctx *espintcp_getctx(const struct sock *sk) { - struct inet_connection_sock *icsk = inet_csk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); /* RCU is only needed for diag */ return (__force void *)icsk->icsk_ulp_data; diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 1a7131d6cb0e..9ab376d1a677 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -97,7 +97,7 @@ struct flow_dissector_key_enc_opts { * here but seems difficult to #include */ u8 len; - __be16 dst_opt_type; + u32 dst_opt_type; }; struct flow_dissector_key_keyid { diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 314087a5e181..ec9f80509f60 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -345,7 +345,7 @@ static inline bool flow_action_has_entries(const struct flow_action *action) * flow_offload_has_one_action() - check if exactly one action is present * @action: tc filter flow offload action * - * Returns true if exactly one action is present. + * Return: true if exactly one action is present. */ static inline bool flow_offload_has_one_action(const struct flow_action *action) { @@ -449,6 +449,61 @@ static inline bool flow_rule_match_key(const struct flow_rule *rule, return dissector_uses_key(rule->match.dissector, key); } +/** + * flow_rule_is_supp_control_flags() - check for supported control flags + * @supp_flags: control flags supported by driver + * @ctrl_flags: control flags present in rule + * @extack: The netlink extended ACK for reporting errors. + * + * Return: true if only supported control flags are set, false otherwise. + */ +static inline bool flow_rule_is_supp_control_flags(const u32 supp_flags, + const u32 ctrl_flags, + struct netlink_ext_ack *extack) +{ + if (likely((ctrl_flags & ~supp_flags) == 0)) + return true; + + NL_SET_ERR_MSG_FMT_MOD(extack, + "Unsupported match on control.flags %#x", + ctrl_flags); + + return false; +} + +/** + * flow_rule_has_control_flags() - check for presence of any control flags + * @ctrl_flags: control flags present in rule + * @extack: The netlink extended ACK for reporting errors. + * + * Return: true if control flags are set, false otherwise. + */ +static inline bool flow_rule_has_control_flags(const u32 ctrl_flags, + struct netlink_ext_ack *extack) +{ + return !flow_rule_is_supp_control_flags(0, ctrl_flags, extack); +} + +/** + * flow_rule_match_has_control_flags() - match and check for any control flags + * @rule: The flow_rule under evaluation. + * @extack: The netlink extended ACK for reporting errors. + * + * Return: true if control flags are set, false otherwise. + */ +static inline bool flow_rule_match_has_control_flags(struct flow_rule *rule, + struct netlink_ext_ack *extack) +{ + struct flow_match_control match; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) + return false; + + flow_rule_match_control(rule, &match); + + return flow_rule_has_control_flags(match.mask->flags, extack); +} + struct flow_stats { u64 pkts; u64 bytes; diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 9ece6e5a3ea8..9ab49bfeae78 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -2,12 +2,20 @@ #ifndef __NET_GENERIC_NETLINK_H #define __NET_GENERIC_NETLINK_H -#include <linux/genetlink.h> +#include <linux/net.h> #include <net/netlink.h> #include <net/net_namespace.h> +#include <uapi/linux/genetlink.h> #define GENLMSG_DEFAULT_SIZE (NLMSG_DEFAULT_SIZE - GENL_HDRLEN) +/* Non-parallel generic netlink requests are serialized by a global lock. */ +void genl_lock(void); +void genl_unlock(void); + +#define MODULE_ALIAS_GENL_FAMILY(family) \ + MODULE_ALIAS_NET_PF_PROTO_NAME(PF_NETLINK, NETLINK_GENERIC, "-family-" family) + /* Binding to multicast group requires %CAP_NET_ADMIN */ #define GENL_MCAST_CAP_NET_ADMIN BIT(0) /* Binding to multicast group requires %CAP_SYS_ADMIN */ diff --git a/include/net/gre.h b/include/net/gre.h index 4e209708b754..ccd293203284 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -49,67 +49,61 @@ static inline bool netif_is_ip6gretap(const struct net_device *dev) !strcmp(dev->rtnl_link_ops->kind, "ip6gretap"); } -static inline int gre_calc_hlen(__be16 o_flags) +static inline int gre_calc_hlen(const unsigned long *o_flags) { int addend = 4; - if (o_flags & TUNNEL_CSUM) + if (test_bit(IP_TUNNEL_CSUM_BIT, o_flags)) addend += 4; - if (o_flags & TUNNEL_KEY) + if (test_bit(IP_TUNNEL_KEY_BIT, o_flags)) addend += 4; - if (o_flags & TUNNEL_SEQ) + if (test_bit(IP_TUNNEL_SEQ_BIT, o_flags)) addend += 4; return addend; } -static inline __be16 gre_flags_to_tnl_flags(__be16 flags) +static inline void gre_flags_to_tnl_flags(unsigned long *dst, __be16 flags) { - __be16 tflags = 0; - - if (flags & GRE_CSUM) - tflags |= TUNNEL_CSUM; - if (flags & GRE_ROUTING) - tflags |= TUNNEL_ROUTING; - if (flags & GRE_KEY) - tflags |= TUNNEL_KEY; - if (flags & GRE_SEQ) - tflags |= TUNNEL_SEQ; - if (flags & GRE_STRICT) - tflags |= TUNNEL_STRICT; - if (flags & GRE_REC) - tflags |= TUNNEL_REC; - if (flags & GRE_VERSION) - tflags |= TUNNEL_VERSION; - - return tflags; + IP_TUNNEL_DECLARE_FLAGS(res) = { }; + + __assign_bit(IP_TUNNEL_CSUM_BIT, res, flags & GRE_CSUM); + __assign_bit(IP_TUNNEL_ROUTING_BIT, res, flags & GRE_ROUTING); + __assign_bit(IP_TUNNEL_KEY_BIT, res, flags & GRE_KEY); + __assign_bit(IP_TUNNEL_SEQ_BIT, res, flags & GRE_SEQ); + __assign_bit(IP_TUNNEL_STRICT_BIT, res, flags & GRE_STRICT); + __assign_bit(IP_TUNNEL_REC_BIT, res, flags & GRE_REC); + __assign_bit(IP_TUNNEL_VERSION_BIT, res, flags & GRE_VERSION); + + ip_tunnel_flags_copy(dst, res); } -static inline __be16 gre_tnl_flags_to_gre_flags(__be16 tflags) +static inline __be16 gre_tnl_flags_to_gre_flags(const unsigned long *tflags) { __be16 flags = 0; - if (tflags & TUNNEL_CSUM) + if (test_bit(IP_TUNNEL_CSUM_BIT, tflags)) flags |= GRE_CSUM; - if (tflags & TUNNEL_ROUTING) + if (test_bit(IP_TUNNEL_ROUTING_BIT, tflags)) flags |= GRE_ROUTING; - if (tflags & TUNNEL_KEY) + if (test_bit(IP_TUNNEL_KEY_BIT, tflags)) flags |= GRE_KEY; - if (tflags & TUNNEL_SEQ) + if (test_bit(IP_TUNNEL_SEQ_BIT, tflags)) flags |= GRE_SEQ; - if (tflags & TUNNEL_STRICT) + if (test_bit(IP_TUNNEL_STRICT_BIT, tflags)) flags |= GRE_STRICT; - if (tflags & TUNNEL_REC) + if (test_bit(IP_TUNNEL_REC_BIT, tflags)) flags |= GRE_REC; - if (tflags & TUNNEL_VERSION) + if (test_bit(IP_TUNNEL_VERSION_BIT, tflags)) flags |= GRE_VERSION; return flags; } static inline void gre_build_header(struct sk_buff *skb, int hdr_len, - __be16 flags, __be16 proto, + const unsigned long *flags, __be16 proto, __be32 key, __be32 seq) { + IP_TUNNEL_DECLARE_FLAGS(cond) = { }; struct gre_base_hdr *greh; skb_push(skb, hdr_len); @@ -120,18 +114,22 @@ static inline void gre_build_header(struct sk_buff *skb, int hdr_len, greh->flags = gre_tnl_flags_to_gre_flags(flags); greh->protocol = proto; - if (flags & (TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_SEQ)) { + __set_bit(IP_TUNNEL_KEY_BIT, cond); + __set_bit(IP_TUNNEL_CSUM_BIT, cond); + __set_bit(IP_TUNNEL_SEQ_BIT, cond); + + if (ip_tunnel_flags_intersect(flags, cond)) { __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); - if (flags & TUNNEL_SEQ) { + if (test_bit(IP_TUNNEL_SEQ_BIT, flags)) { *ptr = seq; ptr--; } - if (flags & TUNNEL_KEY) { + if (test_bit(IP_TUNNEL_KEY_BIT, flags)) { *ptr = key; ptr--; } - if (flags & TUNNEL_CSUM && + if (test_bit(IP_TUNNEL_CSUM_BIT, flags) && !(skb_shinfo(skb)->gso_type & (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) { *ptr = 0; diff --git a/include/net/gro.h b/include/net/gro.h index 50f1e403dbbb..b9b58c1f8d19 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -36,15 +36,14 @@ struct napi_gro_cb { /* This is non-zero if the packet cannot be merged with the new skb. */ u16 flush; - /* Save the IP ID here and check when we get to the transport layer */ - u16 flush_id; - /* Number of segments aggregated. */ u16 count; /* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */ u16 proto; + u16 pad; + /* Used in napi_gro_cb::free */ #define NAPI_GRO_FREE 1 #define NAPI_GRO_FREE_STOLEN_HEAD 2 @@ -75,8 +74,8 @@ struct napi_gro_cb { /* Used in GRE, set in fou/gue_gro_receive */ u8 is_fou:1; - /* Used to determine if flush_id can be ignored */ - u8 is_atomic:1; + /* Used to determine if ipid_offset can be ignored */ + u8 ip_fixedid:1; /* Number of gro_receive callbacks this packet already went through */ u8 recursion_counter:4; @@ -87,6 +86,15 @@ struct napi_gro_cb { /* used to support CHECKSUM_COMPLETE for tunneling protocols */ __wsum csum; + + /* L3 offsets */ + union { + struct { + u16 network_offset; + u16 inner_network_offset; + }; + u16 network_offsets[2]; + }; }; #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) @@ -172,12 +180,17 @@ static inline void *skb_gro_header(struct sk_buff *skb, unsigned int hlen, return ptr; } +static inline int skb_gro_receive_network_offset(const struct sk_buff *skb) +{ + return NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark]; +} + static inline void *skb_gro_network_header(const struct sk_buff *skb) { if (skb_gro_may_pull(skb, skb_gro_offset(skb))) - return skb_gro_header_fast(skb, skb_network_offset(skb)); + return skb_gro_header_fast(skb, skb_gro_receive_network_offset(skb)); - return skb_network_header(skb); + return skb->data + skb_gro_receive_network_offset(skb); } static inline __wsum inet_gro_compute_pseudo(const struct sk_buff *skb, @@ -428,7 +441,71 @@ static inline __wsum ip6_gro_compute_pseudo(const struct sk_buff *skb, skb_gro_len(skb), proto, 0)); } +static inline int inet_gro_flush(const struct iphdr *iph, const struct iphdr *iph2, + struct sk_buff *p, bool outer) +{ + const u32 id = ntohl(*(__be32 *)&iph->id); + const u32 id2 = ntohl(*(__be32 *)&iph2->id); + const u16 ipid_offset = (id >> 16) - (id2 >> 16); + const u16 count = NAPI_GRO_CB(p)->count; + const u32 df = id & IP_DF; + int flush; + + /* All fields must match except length and checksum. */ + flush = (iph->ttl ^ iph2->ttl) | (iph->tos ^ iph2->tos) | (df ^ (id2 & IP_DF)); + + if (flush | (outer && df)) + return flush; + + /* When we receive our second frame we can make a decision on if we + * continue this flow as an atomic flow with a fixed ID or if we use + * an incrementing ID. + */ + if (count == 1 && df && !ipid_offset) + NAPI_GRO_CB(p)->ip_fixedid = true; + + return ipid_offset ^ (count * !NAPI_GRO_CB(p)->ip_fixedid); +} + +static inline int ipv6_gro_flush(const struct ipv6hdr *iph, const struct ipv6hdr *iph2) +{ + /* <Version:4><Traffic_Class:8><Flow_Label:20> */ + __be32 first_word = *(__be32 *)iph ^ *(__be32 *)iph2; + + /* Flush if Traffic Class fields are different. */ + return !!((first_word & htonl(0x0FF00000)) | + (__force __be32)(iph->hop_limit ^ iph2->hop_limit)); +} + +static inline int __gro_receive_network_flush(const void *th, const void *th2, + struct sk_buff *p, const u16 diff, + bool outer) +{ + const void *nh = th - diff; + const void *nh2 = th2 - diff; + + if (((struct iphdr *)nh)->version == 6) + return ipv6_gro_flush(nh, nh2); + else + return inet_gro_flush(nh, nh2, p, outer); +} + +static inline int gro_receive_network_flush(const void *th, const void *th2, + struct sk_buff *p) +{ + const bool encap_mark = NAPI_GRO_CB(p)->encap_mark; + int off = skb_transport_offset(p); + int flush; + + flush = __gro_receive_network_flush(th, th2, p, off - NAPI_GRO_CB(p)->network_offset, encap_mark); + if (encap_mark) + flush |= __gro_receive_network_flush(th, th2, p, off - NAPI_GRO_CB(p)->inner_network_offset, false); + + return flush; +} + int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb); +int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb); /* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ static inline void gro_normal_list(struct napi_struct *napi) diff --git a/include/net/gtp.h b/include/net/gtp.h index 2a503f035d18..c0253c8702d0 100644 --- a/include/net/gtp.h +++ b/include/net/gtp.h @@ -78,4 +78,9 @@ static inline bool netif_is_gtp(const struct net_device *dev) #define GTP1_F_EXTHDR 0x04 #define GTP1_F_MASK 0x07 +struct gtp_ext_hdr { + __u8 len; + __u8 data[]; +}; + #endif diff --git a/include/net/hotdata.h b/include/net/hotdata.h index 003667a1efd6..30e9570beb2a 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -38,6 +38,9 @@ struct net_hotdata { int max_backlog; int dev_tx_weight; int dev_rx_weight; + int sysctl_max_skb_frags; + int sysctl_skb_defer_max; + int sysctl_mem_pcpu_rsv; }; #define inet_ehash_secret net_hotdata.tcp_protocol.secret diff --git a/include/net/ieee8021q.h b/include/net/ieee8021q.h new file mode 100644 index 000000000000..8bfe903dd3d0 --- /dev/null +++ b/include/net/ieee8021q.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2024 Pengutronix, Oleksij Rempel <kernel@pengutronix.de> */ + +#ifndef _NET_IEEE8021Q_H +#define _NET_IEEE8021Q_H + +#include <linux/errno.h> + +/** + * enum ieee8021q_traffic_type - 802.1Q traffic type priority values (802.1Q-2022) + * + * @IEEE8021Q_TT_BK: Background + * @IEEE8021Q_TT_BE: Best Effort (default). According to 802.1Q-2022, BE is 0 + * but has higher priority than BK which is 1. + * @IEEE8021Q_TT_EE: Excellent Effort + * @IEEE8021Q_TT_CA: Critical Applications + * @IEEE8021Q_TT_VI: Video, < 100 ms latency and jitter + * @IEEE8021Q_TT_VO: Voice, < 10 ms latency and jitter + * @IEEE8021Q_TT_IC: Internetwork Control + * @IEEE8021Q_TT_NC: Network Control + */ +enum ieee8021q_traffic_type { + IEEE8021Q_TT_BK = 0, + IEEE8021Q_TT_BE = 1, + IEEE8021Q_TT_EE = 2, + IEEE8021Q_TT_CA = 3, + IEEE8021Q_TT_VI = 4, + IEEE8021Q_TT_VO = 5, + IEEE8021Q_TT_IC = 6, + IEEE8021Q_TT_NC = 7, + + /* private: */ + IEEE8021Q_TT_MAX, +}; + +#define SIMPLE_IETF_DSCP_TO_IEEE8021Q_TT(dscp) ((dscp >> 3) & 0x7) + +#if IS_ENABLED(CONFIG_NET_IEEE8021Q_HELPERS) + +int ietf_dscp_to_ieee8021q_tt(u8 dscp); +int ieee8021q_tt_to_tc(enum ieee8021q_traffic_type tt, unsigned int num_queues); + +#else + +static inline int ietf_dscp_to_ieee8021q_tt(u8 dscp) +{ + return -EOPNOTSUPP; +} + +static inline int ieee8021q_tt_to_tc(enum ieee8021q_traffic_type tt, + unsigned int num_queues) +{ + return -EOPNOTSUPP; +} + +#endif +#endif /* _NET_IEEE8021Q_H */ diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 9ab4bf704e86..20e7b0c0b3d1 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -147,10 +147,7 @@ struct inet_connection_sock { #define ICSK_TIME_LOSS_PROBE 5 /* Tail loss probe timer */ #define ICSK_TIME_REO_TIMEOUT 6 /* Reordering timer */ -static inline struct inet_connection_sock *inet_csk(const struct sock *sk) -{ - return (struct inet_connection_sock *)sk; -} +#define inet_csk(ptr) container_of_const(ptr, struct inet_connection_sock, icsk_inet.sk) static inline void *inet_csk_ca(const struct sock *sk) { @@ -175,6 +172,7 @@ void inet_csk_init_xmit_timers(struct sock *sk, void (*delack_handler)(struct timer_list *), void (*keepalive_handler)(struct timer_list *)); void inet_csk_clear_xmit_timers(struct sock *sk); +void inet_csk_clear_xmit_timers_sync(struct sock *sk); static inline void inet_csk_schedule_ack(struct sock *sk) { @@ -283,7 +281,7 @@ static inline int inet_csk_reqsk_queue_len(const struct sock *sk) static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) { - return inet_csk_reqsk_queue_len(sk) >= sk->sk_max_ack_backlog; + return inet_csk_reqsk_queue_len(sk) >= READ_ONCE(sk->sk_max_ack_backlog); } bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req); diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index f28da08a37b4..2a536eea9424 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -111,7 +111,7 @@ static inline void inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo void inet_twsk_deschedule_put(struct inet_timewait_sock *tw); -void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family); +void inet_twsk_purge(struct inet_hashinfo *hashinfo); static inline struct net *twsk_net(const struct inet_timewait_sock *twsk) diff --git a/include/net/ip.h b/include/net/ip.h index 25cb688bdc62..6d735e00d3f3 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -423,7 +423,7 @@ int ip_decrease_ttl(struct iphdr *iph) static inline int ip_mtu_locked(const struct dst_entry *dst) { - const struct rtable *rt = (const struct rtable *)dst; + const struct rtable *rt = dst_rtable(dst); return rt->rt_mtu_locked || dst_metric_locked(dst, RTAX_MTU); } @@ -461,7 +461,7 @@ static inline bool ip_sk_ignore_df(const struct sock *sk) static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, bool forwarding) { - const struct rtable *rt = container_of(dst, struct rtable, dst); + const struct rtable *rt = dst_rtable(dst); struct net *net = dev_net(dst->dev); unsigned int mtu; diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 323c94f1845b..6cb867ce4878 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -234,9 +234,11 @@ struct fib6_result { for (rt = (w)->leaf; rt; \ rt = rcu_dereference_protected(rt->fib6_next, 1)) -static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst) +#define dst_rt6_info(_ptr) container_of_const(_ptr, struct rt6_info, dst) + +static inline struct inet6_dev *ip6_dst_idev(const struct dst_entry *dst) { - return ((struct rt6_info *)dst)->rt6i_idev; + return dst_rt6_info(dst)->rt6i_idev; } static inline bool fib6_requires_src(const struct fib6_info *rt) @@ -338,7 +340,7 @@ static inline void fib6_info_release(struct fib6_info *f6i) { if (f6i && refcount_dec_and_test(&f6i->fib6_ref)) { DEBUG_NET_WARN_ON_ONCE(!hlist_unhashed(&f6i->gc_link)); - call_rcu(&f6i->rcu, fib6_info_destroy_rcu); + call_rcu_hurry(&f6i->rcu, fib6_info_destroy_rcu); } } diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index a30c6aa9e5cf..a18ed24fed94 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -210,12 +210,11 @@ void rt6_uncached_list_del(struct rt6_info *rt); static inline const struct rt6_info *skb_rt6_info(const struct sk_buff *skb) { const struct dst_entry *dst = skb_dst(skb); - const struct rt6_info *rt6 = NULL; if (dst) - rt6 = container_of(dst, struct rt6_info, dst); + return dst_rt6_info(dst); - return rt6; + return NULL; } /* @@ -227,7 +226,7 @@ static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst, { struct ipv6_pinfo *np = inet6_sk(sk); - np->dst_cookie = rt6_get_cookie((struct rt6_info *)dst); + np->dst_cookie = rt6_get_cookie(dst_rt6_info(dst)); sk_setup_caps(sk, dst); np->daddr_cache = daddr; #ifdef CONFIG_IPV6_SUBTREES @@ -240,7 +239,7 @@ void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst, static inline bool ipv6_unicast_destination(const struct sk_buff *skb) { - struct rt6_info *rt = (struct rt6_info *) skb_dst(skb); + const struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); return rt->rt6i_flags & RTF_LOCAL; } @@ -248,7 +247,7 @@ static inline bool ipv6_unicast_destination(const struct sk_buff *skb) static inline bool ipv6_anycast_destination(const struct dst_entry *dst, const struct in6_addr *daddr) { - struct rt6_info *rt = (struct rt6_info *)dst; + const struct rt6_info *rt = dst_rt6_info(dst); return rt->rt6i_flags & RTF_ANYCAST || (rt->rt6i_dst.plen < 127 && diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 74b369bddf49..399592405c72 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -30,8 +30,8 @@ struct __ip6_tnl_parm { struct in6_addr laddr; /* local tunnel end-point address */ struct in6_addr raddr; /* remote tunnel end-point address */ - __be16 i_flags; - __be16 o_flags; + IP_TUNNEL_DECLARE_FLAGS(i_flags); + IP_TUNNEL_DECLARE_FLAGS(o_flags); __be32 i_key; __be32 o_key; diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 5cd64bb2104d..9a6a08ec7713 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -36,6 +36,24 @@ (sizeof_field(struct ip_tunnel_key, u) - \ sizeof_field(struct ip_tunnel_key, u.ipv4)) +#define __ipt_flag_op(op, ...) \ + op(__VA_ARGS__, __IP_TUNNEL_FLAG_NUM) + +#define IP_TUNNEL_DECLARE_FLAGS(...) \ + __ipt_flag_op(DECLARE_BITMAP, __VA_ARGS__) + +#define ip_tunnel_flags_zero(...) __ipt_flag_op(bitmap_zero, __VA_ARGS__) +#define ip_tunnel_flags_copy(...) __ipt_flag_op(bitmap_copy, __VA_ARGS__) +#define ip_tunnel_flags_and(...) __ipt_flag_op(bitmap_and, __VA_ARGS__) +#define ip_tunnel_flags_or(...) __ipt_flag_op(bitmap_or, __VA_ARGS__) + +#define ip_tunnel_flags_empty(...) \ + __ipt_flag_op(bitmap_empty, __VA_ARGS__) +#define ip_tunnel_flags_intersect(...) \ + __ipt_flag_op(bitmap_intersects, __VA_ARGS__) +#define ip_tunnel_flags_subset(...) \ + __ipt_flag_op(bitmap_subset, __VA_ARGS__) + struct ip_tunnel_key { __be64 tun_id; union { @@ -48,11 +66,11 @@ struct ip_tunnel_key { struct in6_addr dst; } ipv6; } u; - __be16 tun_flags; - u8 tos; /* TOS for IPv4, TC for IPv6 */ - u8 ttl; /* TTL for IPv4, HL for IPv6 */ + IP_TUNNEL_DECLARE_FLAGS(tun_flags); __be32 label; /* Flow Label for IPv6 */ u32 nhid; + u8 tos; /* TOS for IPv4, TC for IPv6 */ + u8 ttl; /* TTL for IPv4, HL for IPv6 */ __be16 tp_src; __be16 tp_dst; __u8 flow_flags; @@ -110,6 +128,17 @@ struct ip_tunnel_prl_entry { struct metadata_dst; +/* Kernel-side variant of ip_tunnel_parm */ +struct ip_tunnel_parm_kern { + char name[IFNAMSIZ]; + IP_TUNNEL_DECLARE_FLAGS(i_flags); + IP_TUNNEL_DECLARE_FLAGS(o_flags); + __be32 i_key; + __be32 o_key; + int link; + struct iphdr iph; +}; + struct ip_tunnel { struct ip_tunnel __rcu *next; struct hlist_node hash_node; @@ -136,7 +165,7 @@ struct ip_tunnel { struct dst_cache dst_cache; - struct ip_tunnel_parm parms; + struct ip_tunnel_parm_kern parms; int mlink; int encap_hlen; /* Encap header length (FOU,GUE) */ @@ -157,7 +186,7 @@ struct ip_tunnel { }; struct tnl_ptk_info { - __be16 flags; + IP_TUNNEL_DECLARE_FLAGS(flags); __be16 proto; __be32 key; __be32 seq; @@ -179,11 +208,80 @@ struct ip_tunnel_net { int type; }; +static inline void ip_tunnel_set_options_present(unsigned long *flags) +{ + IP_TUNNEL_DECLARE_FLAGS(present) = { }; + + __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, present); + __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present); + __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present); + __set_bit(IP_TUNNEL_GTP_OPT_BIT, present); + __set_bit(IP_TUNNEL_PFCP_OPT_BIT, present); + + ip_tunnel_flags_or(flags, flags, present); +} + +static inline void ip_tunnel_clear_options_present(unsigned long *flags) +{ + IP_TUNNEL_DECLARE_FLAGS(present) = { }; + + __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, present); + __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present); + __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present); + __set_bit(IP_TUNNEL_GTP_OPT_BIT, present); + __set_bit(IP_TUNNEL_PFCP_OPT_BIT, present); + + __ipt_flag_op(bitmap_andnot, flags, flags, present); +} + +static inline bool ip_tunnel_is_options_present(const unsigned long *flags) +{ + IP_TUNNEL_DECLARE_FLAGS(present) = { }; + + __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, present); + __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present); + __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present); + __set_bit(IP_TUNNEL_GTP_OPT_BIT, present); + __set_bit(IP_TUNNEL_PFCP_OPT_BIT, present); + + return ip_tunnel_flags_intersect(flags, present); +} + +static inline bool ip_tunnel_flags_is_be16_compat(const unsigned long *flags) +{ + IP_TUNNEL_DECLARE_FLAGS(supp) = { }; + + bitmap_set(supp, 0, BITS_PER_TYPE(__be16)); + __set_bit(IP_TUNNEL_VTI_BIT, supp); + + return ip_tunnel_flags_subset(flags, supp); +} + +static inline void ip_tunnel_flags_from_be16(unsigned long *dst, __be16 flags) +{ + ip_tunnel_flags_zero(dst); + + bitmap_write(dst, be16_to_cpu(flags), 0, BITS_PER_TYPE(__be16)); + __assign_bit(IP_TUNNEL_VTI_BIT, dst, flags & VTI_ISVTI); +} + +static inline __be16 ip_tunnel_flags_to_be16(const unsigned long *flags) +{ + __be16 ret; + + ret = cpu_to_be16(bitmap_read(flags, 0, BITS_PER_TYPE(__be16))); + if (test_bit(IP_TUNNEL_VTI_BIT, flags)) + ret |= VTI_ISVTI; + + return ret; +} + static inline void ip_tunnel_key_init(struct ip_tunnel_key *key, __be32 saddr, __be32 daddr, u8 tos, u8 ttl, __be32 label, __be16 tp_src, __be16 tp_dst, - __be64 tun_id, __be16 tun_flags) + __be64 tun_id, + const unsigned long *tun_flags) { key->tun_id = tun_id; key->u.ipv4.src = saddr; @@ -193,7 +291,7 @@ static inline void ip_tunnel_key_init(struct ip_tunnel_key *key, key->tos = tos; key->ttl = ttl; key->label = label; - key->tun_flags = tun_flags; + ip_tunnel_flags_copy(key->tun_flags, tun_flags); /* For the tunnel types on the top of IPsec, the tp_src and tp_dst of * the upper tunnel are used. @@ -214,12 +312,8 @@ ip_tunnel_dst_cache_usable(const struct sk_buff *skb, { if (skb->mark) return false; - if (!info) - return true; - if (info->key.tun_flags & TUNNEL_NOCACHE) - return false; - return true; + return !info || !test_bit(IP_TUNNEL_NOCACHE_BIT, info->key.tun_flags); } static inline unsigned short ip_tunnel_info_af(const struct ip_tunnel_info @@ -291,14 +385,18 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, const u8 protocol); void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const u8 proto, int tunnel_hlen); -int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); +int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p, + int cmd); +bool ip_tunnel_parm_from_user(struct ip_tunnel_parm_kern *kp, + const void __user *data); +bool ip_tunnel_parm_to_user(void __user *data, struct ip_tunnel_parm_kern *kp); int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd); int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict); int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu); struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, - int link, __be16 flags, + int link, const unsigned long *flags, __be32 remote, __be32 local, __be32 key); @@ -307,16 +405,16 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, bool log_ecn_error); int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], - struct ip_tunnel_parm *p, __u32 fwmark); + struct ip_tunnel_parm_kern *p, __u32 fwmark); int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], - struct ip_tunnel_parm *p, __u32 fwmark); + struct ip_tunnel_parm_kern *p, __u32 fwmark); void ip_tunnel_setup(struct net_device *dev, unsigned int net_id); bool ip_tunnel_netlink_encap_parms(struct nlattr *data[], struct ip_tunnel_encap *encap); void ip_tunnel_netlink_parms(struct nlattr *data[], - struct ip_tunnel_parm *parms); + struct ip_tunnel_parm_kern *parms); extern const struct header_ops ip_tunnel_header_ops; __be16 ip_tunnel_parse_protocol(const struct sk_buff *skb); @@ -361,6 +459,39 @@ static inline bool pskb_inet_may_pull(struct sk_buff *skb) return pskb_network_may_pull(skb, nhlen); } +/* Variant of pskb_inet_may_pull(). + */ +static inline bool skb_vlan_inet_prepare(struct sk_buff *skb) +{ + int nhlen = 0, maclen = ETH_HLEN; + __be16 type = skb->protocol; + + /* Essentially this is skb_protocol(skb, true) + * And we get MAC len. + */ + if (eth_type_vlan(type)) + type = __vlan_get_protocol(skb, type, &maclen); + + switch (type) { +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + nhlen = sizeof(struct ipv6hdr); + break; +#endif + case htons(ETH_P_IP): + nhlen = sizeof(struct iphdr); + break; + } + /* For ETH_P_IPV6/ETH_P_IP we make sure to pull + * a base network header in skb->head. + */ + if (!pskb_may_pull(skb, maclen + nhlen)) + return false; + + skb_set_network_header(skb, maclen); + return true; +} + static inline int ip_encap_hlen(struct ip_tunnel_encap *e) { const struct ip_tunnel_encap_ops *ops; @@ -514,12 +645,13 @@ static inline void ip_tunnel_info_opts_get(void *to, static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, const void *from, int len, - __be16 flags) + const unsigned long *flags) { info->options_len = len; if (len > 0) { memcpy(ip_tunnel_info_opts(info), from, len); - info->key.tun_flags |= flags; + ip_tunnel_flags_or(info->key.tun_flags, info->key.tun_flags, + flags); } } @@ -563,7 +695,7 @@ static inline void ip_tunnel_info_opts_get(void *to, static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, const void *from, int len, - __be16 flags) + const unsigned long *flags) { info->options_len = 0; } diff --git a/include/net/iucv/iucv.h b/include/net/iucv/iucv.h index 8b2055d64a6b..5cd7871127c9 100644 --- a/include/net/iucv/iucv.h +++ b/include/net/iucv/iucv.h @@ -30,6 +30,7 @@ #include <linux/types.h> #include <linux/slab.h> +#include <asm/dma-types.h> #include <asm/debug.h> /* @@ -76,7 +77,7 @@ * and iucv_message_reply if IUCV_IPBUFLST or IUCV_IPANSLST are used. */ struct iucv_array { - u32 address; + dma32_t address; u32 length; } __attribute__ ((aligned (8))); diff --git a/include/net/libeth/rx.h b/include/net/libeth/rx.h new file mode 100644 index 000000000000..f29ea3e34c6c --- /dev/null +++ b/include/net/libeth/rx.h @@ -0,0 +1,242 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2024 Intel Corporation */ + +#ifndef __LIBETH_RX_H +#define __LIBETH_RX_H + +#include <linux/if_vlan.h> + +#include <net/page_pool/helpers.h> +#include <net/xdp.h> + +/* Rx buffer management */ + +/* Space reserved in front of each frame */ +#define LIBETH_SKB_HEADROOM (NET_SKB_PAD + NET_IP_ALIGN) +/* Maximum headroom for worst-case calculations */ +#define LIBETH_MAX_HEADROOM LIBETH_SKB_HEADROOM +/* Link layer / L2 overhead: Ethernet, 2 VLAN tags (C + S), FCS */ +#define LIBETH_RX_LL_LEN (ETH_HLEN + 2 * VLAN_HLEN + ETH_FCS_LEN) + +/* Always use order-0 pages */ +#define LIBETH_RX_PAGE_ORDER 0 +/* Pick a sane buffer stride and align to a cacheline boundary */ +#define LIBETH_RX_BUF_STRIDE SKB_DATA_ALIGN(128) +/* HW-writeable space in one buffer: truesize - headroom/tailroom, aligned */ +#define LIBETH_RX_PAGE_LEN(hr) \ + ALIGN_DOWN(SKB_MAX_ORDER(hr, LIBETH_RX_PAGE_ORDER), \ + LIBETH_RX_BUF_STRIDE) + +/** + * struct libeth_fqe - structure representing an Rx buffer (fill queue element) + * @page: page holding the buffer + * @offset: offset from the page start (to the headroom) + * @truesize: total space occupied by the buffer (w/ headroom and tailroom) + * + * Depending on the MTU, API switches between one-page-per-frame and shared + * page model (to conserve memory on bigger-page platforms). In case of the + * former, @offset is always 0 and @truesize is always ```PAGE_SIZE```. + */ +struct libeth_fqe { + struct page *page; + u32 offset; + u32 truesize; +} __aligned_largest; + +/** + * struct libeth_fq - structure representing a buffer (fill) queue + * @fp: hotpath part of the structure + * @pp: &page_pool for buffer management + * @fqes: array of Rx buffers + * @truesize: size to allocate per buffer, w/overhead + * @count: number of descriptors/buffers the queue has + * @buf_len: HW-writeable length per each buffer + * @nid: ID of the closest NUMA node with memory + */ +struct libeth_fq { + struct_group_tagged(libeth_fq_fp, fp, + struct page_pool *pp; + struct libeth_fqe *fqes; + + u32 truesize; + u32 count; + ); + + /* Cold fields */ + u32 buf_len; + int nid; +}; + +int libeth_rx_fq_create(struct libeth_fq *fq, struct napi_struct *napi); +void libeth_rx_fq_destroy(struct libeth_fq *fq); + +/** + * libeth_rx_alloc - allocate a new Rx buffer + * @fq: fill queue to allocate for + * @i: index of the buffer within the queue + * + * Return: DMA address to be passed to HW for Rx on successful allocation, + * ```DMA_MAPPING_ERROR``` otherwise. + */ +static inline dma_addr_t libeth_rx_alloc(const struct libeth_fq_fp *fq, u32 i) +{ + struct libeth_fqe *buf = &fq->fqes[i]; + + buf->truesize = fq->truesize; + buf->page = page_pool_dev_alloc(fq->pp, &buf->offset, &buf->truesize); + if (unlikely(!buf->page)) + return DMA_MAPPING_ERROR; + + return page_pool_get_dma_addr(buf->page) + buf->offset + + fq->pp->p.offset; +} + +void libeth_rx_recycle_slow(struct page *page); + +/** + * libeth_rx_sync_for_cpu - synchronize or recycle buffer post DMA + * @fqe: buffer to process + * @len: frame length from the descriptor + * + * Process the buffer after it's written by HW. The regular path is to + * synchronize DMA for CPU, but in case of no data it will be immediately + * recycled back to its PP. + * + * Return: true when there's data to process, false otherwise. + */ +static inline bool libeth_rx_sync_for_cpu(const struct libeth_fqe *fqe, + u32 len) +{ + struct page *page = fqe->page; + + /* Very rare, but possible case. The most common reason: + * the last fragment contained FCS only, which was then + * stripped by the HW. + */ + if (unlikely(!len)) { + libeth_rx_recycle_slow(page); + return false; + } + + page_pool_dma_sync_for_cpu(page->pp, page, fqe->offset, len); + + return true; +} + +/* Converting abstract packet type numbers into a software structure with + * the packet parameters to do O(1) lookup on Rx. + */ + +enum { + LIBETH_RX_PT_OUTER_L2 = 0U, + LIBETH_RX_PT_OUTER_IPV4, + LIBETH_RX_PT_OUTER_IPV6, +}; + +enum { + LIBETH_RX_PT_NOT_FRAG = 0U, + LIBETH_RX_PT_FRAG, +}; + +enum { + LIBETH_RX_PT_TUNNEL_IP_NONE = 0U, + LIBETH_RX_PT_TUNNEL_IP_IP, + LIBETH_RX_PT_TUNNEL_IP_GRENAT, + LIBETH_RX_PT_TUNNEL_IP_GRENAT_MAC, + LIBETH_RX_PT_TUNNEL_IP_GRENAT_MAC_VLAN, +}; + +enum { + LIBETH_RX_PT_TUNNEL_END_NONE = 0U, + LIBETH_RX_PT_TUNNEL_END_IPV4, + LIBETH_RX_PT_TUNNEL_END_IPV6, +}; + +enum { + LIBETH_RX_PT_INNER_NONE = 0U, + LIBETH_RX_PT_INNER_UDP, + LIBETH_RX_PT_INNER_TCP, + LIBETH_RX_PT_INNER_SCTP, + LIBETH_RX_PT_INNER_ICMP, + LIBETH_RX_PT_INNER_TIMESYNC, +}; + +#define LIBETH_RX_PT_PAYLOAD_NONE PKT_HASH_TYPE_NONE +#define LIBETH_RX_PT_PAYLOAD_L2 PKT_HASH_TYPE_L2 +#define LIBETH_RX_PT_PAYLOAD_L3 PKT_HASH_TYPE_L3 +#define LIBETH_RX_PT_PAYLOAD_L4 PKT_HASH_TYPE_L4 + +struct libeth_rx_pt { + u32 outer_ip:2; + u32 outer_frag:1; + u32 tunnel_type:3; + u32 tunnel_end_prot:2; + u32 tunnel_end_frag:1; + u32 inner_prot:3; + enum pkt_hash_types payload_layer:2; + + u32 pad:2; + enum xdp_rss_hash_type hash_type:16; +}; + +void libeth_rx_pt_gen_hash_type(struct libeth_rx_pt *pt); + +/** + * libeth_rx_pt_get_ip_ver - get IP version from a packet type structure + * @pt: packet type params + * + * Wrapper to compile out the IPv6 code from the drivers when not supported + * by the kernel. + * + * Return: @pt.outer_ip or stub for IPv6 when not compiled-in. + */ +static inline u32 libeth_rx_pt_get_ip_ver(struct libeth_rx_pt pt) +{ +#if !IS_ENABLED(CONFIG_IPV6) + switch (pt.outer_ip) { + case LIBETH_RX_PT_OUTER_IPV4: + return LIBETH_RX_PT_OUTER_IPV4; + default: + return LIBETH_RX_PT_OUTER_L2; + } +#else + return pt.outer_ip; +#endif +} + +/* libeth_has_*() can be used to quickly check whether the HW metadata is + * available to avoid further expensive processing such as descriptor reads. + * They already check for the corresponding netdev feature to be enabled, + * thus can be used as drop-in replacements. + */ + +static inline bool libeth_rx_pt_has_checksum(const struct net_device *dev, + struct libeth_rx_pt pt) +{ + /* Non-zero _INNER* is only possible when _OUTER_IPV* is set, + * it is enough to check only for the L4 type. + */ + return likely(pt.inner_prot > LIBETH_RX_PT_INNER_NONE && + (dev->features & NETIF_F_RXCSUM)); +} + +static inline bool libeth_rx_pt_has_hash(const struct net_device *dev, + struct libeth_rx_pt pt) +{ + return likely(pt.payload_layer > LIBETH_RX_PT_PAYLOAD_NONE && + (dev->features & NETIF_F_RXHASH)); +} + +/** + * libeth_rx_pt_set_hash - fill in skb hash value basing on the PT + * @skb: skb to fill the hash in + * @hash: 32-bit hash value from the descriptor + * @pt: packet type + */ +static inline void libeth_rx_pt_set_hash(struct sk_buff *skb, u32 hash, + struct libeth_rx_pt pt) +{ + skb_set_hash(skb, hash, pt.payload_layer); +} + +#endif /* __LIBETH_RX_H */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 353488ab94a2..cafc664ee531 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -361,7 +361,7 @@ struct ieee80211_vif_chanctx_switch { * @BSS_CHANGED_UNSOL_BCAST_PROBE_RESP: Unsolicited broadcast probe response * status changed. * @BSS_CHANGED_MLD_VALID_LINKS: MLD valid links status changed. - * @BSS_CHANGED_MLD_TTLM: TID to link mapping was changed + * @BSS_CHANGED_MLD_TTLM: negotiated TID to link mapping was changed */ enum ieee80211_bss_change { BSS_CHANGED_ASSOC = 1<<0, @@ -953,6 +953,8 @@ enum mac80211_tx_info_flags { * of their QoS TID or other priority field values. * @IEEE80211_TX_CTRL_MCAST_MLO_FIRST_TX: first MLO TX, used mostly internally * for sequence number assignment + * @IEEE80211_TX_CTRL_SCAN_TX: Indicates that this frame is transmitted + * due to scanning, not in normal operation on the interface. * @IEEE80211_TX_CTRL_MLO_LINK: If not @IEEE80211_LINK_UNSPECIFIED, this * frame should be transmitted on the specific link. This really is * only relevant for frames that do not have data present, and is @@ -973,6 +975,7 @@ enum mac80211_tx_control_flags { IEEE80211_TX_CTRL_NO_SEQNO = BIT(7), IEEE80211_TX_CTRL_DONT_REORDER = BIT(8), IEEE80211_TX_CTRL_MCAST_MLO_FIRST_TX = BIT(9), + IEEE80211_TX_CTRL_SCAN_TX = BIT(10), IEEE80211_TX_CTRL_MLO_LINK = 0xf0000000, }; @@ -1921,10 +1924,12 @@ enum ieee80211_neg_ttlm_res { * @active_links: The bitmap of active links, or 0 for non-MLO. * The driver shouldn't change this directly, but use the * API calls meant for that purpose. - * @dormant_links: bitmap of valid but disabled links, or 0 for non-MLO. - * Must be a subset of valid_links. + * @dormant_links: subset of the valid links that are disabled/suspended + * due to advertised or negotiated TTLM respectively. + * 0 for non-MLO. * @suspended_links: subset of dormant_links representing links that are - * suspended. + * suspended due to negotiated TTLM, and could be activated in the + * future by tearing down the TTLM negotiation. * 0 for non-MLO. * @neg_ttlm: negotiated TID to link mapping info. * see &struct ieee80211_neg_ttlm. @@ -2049,7 +2054,7 @@ static inline bool ieee80211_vif_is_mesh(struct ieee80211_vif *vif) * This can be used by mac80211 drivers with direct cfg80211 APIs * (like the vendor commands) that get a wdev. * - * Note that this function may return %NULL if the given wdev isn't + * Return: pointer to the wdev, or %NULL if the given wdev isn't * associated with a vif that the driver knows about (e.g. monitor * or AP_VLAN interfaces.) */ @@ -2062,6 +2067,8 @@ struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev); * This can be used by mac80211 drivers with direct cfg80211 APIs * (like the vendor commands) that needs to get the wdev for a vif. * This can also be useful to get the netdev associated to a vif. + * + * Return: pointer to the wdev */ struct wireless_dev *ieee80211_vif_to_wdev(struct ieee80211_vif *vif); @@ -2120,8 +2127,8 @@ static inline bool lockdep_vif_wiphy_mutex_held(struct ieee80211_vif *vif) * @IEEE80211_KEY_FLAG_GENERATE_MMIC on the same key. * @IEEE80211_KEY_FLAG_NO_AUTO_TX: Key needs explicit Tx activation. * @IEEE80211_KEY_FLAG_GENERATE_MMIE: This flag should be set by the driver - * for a AES_CMAC key to indicate that it requires sequence number - * generation only + * for a AES_CMAC or a AES_GMAC key to indicate that it requires sequence + * number generation only * @IEEE80211_KEY_FLAG_SPP_AMSDU: SPP A-MSDUs can be used with this key * (set by mac80211 from the sta->spp_amsdu flag) */ @@ -2777,6 +2784,8 @@ struct ieee80211_txq { * * @IEEE80211_HW_DISALLOW_PUNCTURING: HW requires disabling puncturing in EHT * and connecting with a lower bandwidth instead + * @IEEE80211_HW_DISALLOW_PUNCTURING_5GHZ: HW requires disabling puncturing in + * EHT in 5 GHz and connecting with a lower bandwidth instead * * @IEEE80211_HW_HANDLES_QUIET_CSA: HW/driver handles quieting for CSA, so * no need to stop queues. This really should be set by a driver that @@ -2841,6 +2850,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_DETECTS_COLOR_COLLISION, IEEE80211_HW_MLO_MCAST_MULTI_LINK_TX, IEEE80211_HW_DISALLOW_PUNCTURING, + IEEE80211_HW_DISALLOW_PUNCTURING_5GHZ, IEEE80211_HW_HANDLES_QUIET_CSA, /* keep last, obviously */ @@ -5597,7 +5607,7 @@ void ieee80211_csa_finish(struct ieee80211_vif *vif, unsigned int link_id); * @vif: &struct ieee80211_vif pointer from the add_interface callback. * @link_id: valid link_id during MLO or 0 for non-MLO * - * This function returns whether the countdown reached zero. + * Return: %true if the countdown reached 1, %false otherwise */ bool ieee80211_beacon_cntdwn_is_complete(struct ieee80211_vif *vif, unsigned int link_id); @@ -5605,12 +5615,13 @@ bool ieee80211_beacon_cntdwn_is_complete(struct ieee80211_vif *vif, /** * ieee80211_color_change_finish - notify mac80211 about color change * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @link_id: valid link_id during MLO or 0 for non-MLO * * After a color change announcement was scheduled and the counter in this * announcement hits 1, this function must be called by the driver to * notify mac80211 that the color can be changed */ -void ieee80211_color_change_finish(struct ieee80211_vif *vif); +void ieee80211_color_change_finish(struct ieee80211_vif *vif, u8 link_id); /** * ieee80211_proberesp_get - retrieve a Probe Response template @@ -5942,8 +5953,8 @@ void ieee80211_remove_key(struct ieee80211_key_conf *keyconf); * key(s) will be available. These will be needed by mac80211 for proper * RX processing, so this function allows setting them. * - * The function returns the newly allocated key structure, which will - * have similar contents to the passed key configuration but point to + * Return: the newly allocated key structure, which will have + * similar contents to the passed key configuration but point to * mac80211-owned memory. In case of errors, the function returns an * ERR_PTR(), use IS_ERR() etc. * @@ -6342,6 +6353,8 @@ struct ieee80211_sta *ieee80211_find_sta_by_ifaddr(struct ieee80211_hw *hw, * may be %NULL if the link ID is not needed * * Obtain the STA by link address, must use RCU protection. + * + * Return: pointer to STA if found, otherwise %NULL. */ struct ieee80211_sta * ieee80211_find_sta_by_link_addrs(struct ieee80211_hw *hw, @@ -6471,8 +6484,8 @@ void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid, * @hw: pointer obtained from ieee80211_alloc_hw() * @txq: pointer obtained from station or virtual interface * - * Return true if the AQL's airtime limit has not been reached and the txq can - * continue to send more packets to the device. Otherwise return false. + * Return: %true if the AQL's airtime limit has not been reached and the txq can + * continue to send more packets to the device. Otherwise return %false. */ bool ieee80211_txq_airtime_check(struct ieee80211_hw *hw, struct ieee80211_txq *txq); @@ -6975,6 +6988,8 @@ bool rate_usable_index_exists(struct ieee80211_supported_band *sband, * @hw: pointer as obtained from ieee80211_alloc_hw() * @pubsta: &struct ieee80211_sta pointer to the target destination. * @rates: new tx rate set to be used for this station. + * + * Return: 0 on success. An error code otherwise. */ int rate_control_set_rates(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, @@ -7135,6 +7150,8 @@ void ieee80211_report_wowlan_wakeup(struct ieee80211_vif *vif, * @band: the band to transmit on * @sta: optional pointer to get the station to send the frame to * + * Return: %true if the skb was prepared, %false otherwise + * * Note: must be called under RCU lock */ bool ieee80211_tx_prepare_skb(struct ieee80211_hw *hw, @@ -7151,6 +7168,8 @@ bool ieee80211_tx_prepare_skb(struct ieee80211_hw *hw, * * @skb: packet injected by userspace * @dev: the &struct device of this 802.11 device + * + * Return: %true if the radiotap header was parsed, %false otherwise */ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb, struct net_device *dev); @@ -7260,7 +7279,7 @@ void ieee80211_unreserve_tid(struct ieee80211_sta *sta, u8 tid); * @txq: pointer obtained from station or virtual interface, or from * ieee80211_next_txq() * - * Returns the skb if successful, %NULL if no frame was available. + * Return: the skb if successful, %NULL if no frame was available. * * Note that this must be called in an rcu_read_lock() critical section, * which can only be released after the SKB was handled. Some pointers in @@ -7286,6 +7305,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, * @hw: pointer as obtained from ieee80211_alloc_hw() * @txq: pointer obtained from station or virtual interface, or from * ieee80211_next_txq() + * + * Return: the skb if successful, %NULL if no frame was available. */ static inline struct sk_buff *ieee80211_tx_dequeue_ni(struct ieee80211_hw *hw, struct ieee80211_txq *txq) @@ -7317,7 +7338,7 @@ void ieee80211_handle_wake_tx_queue(struct ieee80211_hw *hw, * @hw: pointer as obtained from ieee80211_alloc_hw() * @ac: AC number to return packets from. * - * Returns the next txq if successful, %NULL if no queue is eligible. If a txq + * Return: the next txq if successful, %NULL if no queue is eligible. If a txq * is returned, it should be returned with ieee80211_return_txq() after the * driver has finished scheduling it. */ @@ -7400,6 +7421,8 @@ ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq, * * @hw: pointer as obtained from ieee80211_alloc_hw() * @txq: pointer obtained from station or virtual interface + * + * Return: %true if transmission is allowed, %false otherwise */ bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw, struct ieee80211_txq *txq); @@ -7460,6 +7483,8 @@ void ieee80211_nan_func_match(struct ieee80211_vif *vif, * @status: &struct ieee80211_rx_status containing the transmission rate * information. * @len: frame length in bytes + * + * Return: the airtime estimate */ u32 ieee80211_calc_rx_airtime(struct ieee80211_hw *hw, struct ieee80211_rx_status *status, @@ -7474,23 +7499,13 @@ u32 ieee80211_calc_rx_airtime(struct ieee80211_hw *hw, * @hw: pointer as obtained from ieee80211_alloc_hw() * @info: &struct ieee80211_tx_info of the frame. * @len: frame length in bytes + * + * Return: the airtime estimate */ u32 ieee80211_calc_tx_airtime(struct ieee80211_hw *hw, struct ieee80211_tx_info *info, int len); /** - * ieee80211_set_hw_80211_encap - enable hardware encapsulation offloading. - * - * This function is used to notify mac80211 that a vif can be passed raw 802.3 - * frames. The driver needs to then handle the 802.11 encapsulation inside the - * hardware or firmware. - * - * @vif: &struct ieee80211_vif pointer from the add_interface callback. - * @enable: indicate if the feature should be turned on or off - */ -bool ieee80211_set_hw_80211_encap(struct ieee80211_vif *vif, bool enable); - -/** * ieee80211_get_fils_discovery_tmpl - Get FILS discovery template. * @hw: pointer obtained from ieee80211_alloc_hw(). * @vif: &struct ieee80211_vif pointer from the add_interface callback. @@ -7519,6 +7534,7 @@ ieee80211_get_unsol_bcast_probe_resp_tmpl(struct ieee80211_hw *hw, /** * ieee80211_obss_color_collision_notify - notify userland about a BSS color * collision. + * @link_id: valid link_id during MLO or 0 for non-MLO * * @vif: &struct ieee80211_vif pointer from the add_interface callback. * @color_bitmap: a 64 bit bitmap representing the colors that the local BSS is @@ -7526,7 +7542,7 @@ ieee80211_get_unsol_bcast_probe_resp_tmpl(struct ieee80211_hw *hw, */ void ieee80211_obss_color_collision_notify(struct ieee80211_vif *vif, - u64 color_bitmap); + u64 color_bitmap, u8 link_id); /** * ieee80211_is_tx_data - check if frame is a data frame @@ -7535,6 +7551,8 @@ ieee80211_obss_color_collision_notify(struct ieee80211_vif *vif, * hardware encapsulation enabled are data frames. * * @skb: the frame to be transmitted. + * + * Return: %true if @skb is a data frame, %false otherwise */ static inline bool ieee80211_is_tx_data(struct sk_buff *skb) { @@ -7570,6 +7588,8 @@ static inline bool ieee80211_is_tx_data(struct sk_buff *skb) * - change_sta_links(0x10) for each affected STA (the AP) * - assign_vif_chanctx(link_id=4) * - change_vif_links(0x10) + * + * Return: 0 on success. An error code otherwise. */ int ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links); @@ -7586,6 +7606,15 @@ int ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links); void ieee80211_set_active_links_async(struct ieee80211_vif *vif, u16 active_links); +/** + * ieee80211_send_teardown_neg_ttlm - tear down a negotiated TTLM request + * @vif: the interface on which the tear down request should be sent. + * + * This function can be used to tear down a previously accepted negotiated + * TTLM request. + */ +void ieee80211_send_teardown_neg_ttlm(struct ieee80211_vif *vif); + /* for older drivers - let's not document these ... */ int ieee80211_emulate_add_chanctx(struct ieee80211_hw *hw, struct ieee80211_chanctx_conf *ctx); diff --git a/include/net/macsec.h b/include/net/macsec.h index dbd22180cc5c..de216cbc6b05 100644 --- a/include/net/macsec.h +++ b/include/net/macsec.h @@ -321,6 +321,7 @@ struct macsec_context { * for the TX tag * @needed_tailroom: number of bytes reserved at the end of the sk_buff for the * TX tag + * @rx_uses_md_dst: whether MACsec device offload supports sk_buff md_dst */ struct macsec_ops { /* Device wide */ @@ -352,6 +353,7 @@ struct macsec_ops { struct sk_buff *skb); unsigned int needed_headroom; unsigned int needed_tailroom; + bool rx_uses_md_dst; }; void macsec_pn_wrapped(struct macsec_secy *secy, struct macsec_tx_sa *tx_sa); diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 76147feb0d10..561f6719fb4e 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -39,7 +39,6 @@ enum TRI_STATE { #define COMP_ENTRY_SIZE 64 #define RX_BUFFERS_PER_QUEUE 512 -#define MANA_RX_DATA_ALIGN 64 #define MAX_SEND_BUFFERS_PER_QUEUE 256 @@ -671,6 +670,7 @@ struct mana_cfg_rx_steer_req_v2 { u8 hashkey[MANA_HASH_KEY_SIZE]; u8 cqe_coalescing_enable; u8 reserved2[7]; + mana_handle_t indir_tab[] __counted_by(num_indir_entries); }; /* HW DATA */ struct mana_cfg_rx_steer_resp { diff --git a/include/net/mptcp.h b/include/net/mptcp.h index fb996124b3d5..0bc4ab03f487 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -97,6 +97,9 @@ struct mptcp_out_options { }; #define MPTCP_SCHED_NAME_MAX 16 +#define MPTCP_SCHED_MAX 128 +#define MPTCP_SCHED_BUF_MAX (MPTCP_SCHED_NAME_MAX * MPTCP_SCHED_MAX) + #define MPTCP_SUBFLOWS_MAX 8 struct mptcp_sched_data { diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index 1ec408585373..a8a7e48dfa6c 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -9,11 +9,41 @@ struct netdev_queue_stats_rx { u64 bytes; u64 packets; u64 alloc_fail; + + u64 hw_drops; + u64 hw_drop_overruns; + + u64 csum_unnecessary; + u64 csum_none; + u64 csum_bad; + + u64 hw_gro_packets; + u64 hw_gro_bytes; + u64 hw_gro_wire_packets; + u64 hw_gro_wire_bytes; + + u64 hw_drop_ratelimits; }; struct netdev_queue_stats_tx { u64 bytes; u64 packets; + + u64 hw_drops; + u64 hw_drop_errors; + + u64 csum_none; + u64 needs_csum; + + u64 hw_gso_packets; + u64 hw_gso_bytes; + u64 hw_gso_wire_packets; + u64 hw_gso_wire_bytes; + + u64 hw_drop_ratelimits; + + u64 stop; + u64 wake; }; /** @@ -61,6 +91,37 @@ struct netdev_stat_ops { }; /** + * struct netdev_queue_mgmt_ops - netdev ops for queue management + * + * @ndo_queue_mem_size: Size of the struct that describes a queue's memory. + * + * @ndo_queue_mem_alloc: Allocate memory for an RX queue at the specified index. + * The new memory is written at the specified address. + * + * @ndo_queue_mem_free: Free memory from an RX queue. + * + * @ndo_queue_start: Start an RX queue with the specified memory and at the + * specified index. + * + * @ndo_queue_stop: Stop the RX queue at the specified index. The stopped + * queue's memory is written at the specified address. + */ +struct netdev_queue_mgmt_ops { + size_t ndo_queue_mem_size; + int (*ndo_queue_mem_alloc)(struct net_device *dev, + void *per_queue_mem, + int idx); + void (*ndo_queue_mem_free)(struct net_device *dev, + void *per_queue_mem); + int (*ndo_queue_start)(struct net_device *dev, + void *per_queue_mem, + int idx); + int (*ndo_queue_stop)(struct net_device *dev, + void *per_queue_mem, + int idx); +}; + +/** * DOC: Lockless queue stopping / waking helpers. * * The netif_txq_maybe_stop() and __netif_txq_completed_wake() diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index a763dd327c6e..9abb7ee40d72 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -336,7 +336,7 @@ int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow, int nf_flow_table_offload_init(void); void nf_flow_table_offload_exit(void); -static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb) +static inline __be16 __nf_flow_pppoe_proto(const struct sk_buff *skb) { __be16 proto; @@ -352,6 +352,16 @@ static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb) return 0; } +static inline bool nf_flow_pppoe_proto(struct sk_buff *skb, __be16 *inner_proto) +{ + if (!pskb_may_pull(skb, PPPOE_SES_HLEN)) + return false; + + *inner_proto = __nf_flow_pppoe_proto(skb); + + return true; +} + #define NF_FLOW_TABLE_STAT_INC(net, count) __this_cpu_inc((net)->ft.stat->count) #define NF_FLOW_TABLE_STAT_DEC(net, count) __this_cpu_dec((net)->ft.stat->count) #define NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count) \ diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index e27c28b612e4..2796153b03da 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -307,9 +307,23 @@ static inline void *nft_elem_priv_cast(const struct nft_elem_priv *priv) return (void *)priv; } + +/** + * enum nft_iter_type - nftables set iterator type + * + * @NFT_ITER_READ: read-only iteration over set elements + * @NFT_ITER_UPDATE: iteration under mutex to update set element state + */ +enum nft_iter_type { + NFT_ITER_UNSPEC, + NFT_ITER_READ, + NFT_ITER_UPDATE, +}; + struct nft_set; struct nft_set_iter { u8 genmask; + enum nft_iter_type type:8; unsigned int count; unsigned int skip; int err; @@ -402,7 +416,7 @@ struct nft_expr_info; int nft_expr_inner_parse(const struct nft_ctx *ctx, const struct nlattr *nla, struct nft_expr_info *info); -int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src); +int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src, gfp_t gfp); void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr); int nft_expr_dump(struct sk_buff *skb, unsigned int attr, const struct nft_expr *expr, bool reset); @@ -921,7 +935,7 @@ struct nft_expr_ops { struct nft_regs *regs, const struct nft_pktinfo *pkt); int (*clone)(struct nft_expr *dst, - const struct nft_expr *src); + const struct nft_expr *src, gfp_t gfp); unsigned int size; int (*init)(const struct nft_ctx *ctx, diff --git a/include/net/netlabel.h b/include/net/netlabel.h index f3ab0b8a4b18..2133ad723fc1 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -470,7 +470,8 @@ void netlbl_bitmap_setbit(unsigned char *bitmap, u32 bit, u8 state); int netlbl_enabled(void); int netlbl_sock_setattr(struct sock *sk, u16 family, - const struct netlbl_lsm_secattr *secattr); + const struct netlbl_lsm_secattr *secattr, + bool sk_locked); void netlbl_sock_delattr(struct sock *sk); int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr); @@ -487,6 +488,7 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb, u16 family, struct netlbl_lsm_secattr *secattr); void netlbl_skbuff_err(struct sk_buff *skb, u16 family, int error, int gateway); +bool netlbl_sk_lock_check(struct sock *sk); /* * LSM label mapping cache operations @@ -614,7 +616,8 @@ static inline int netlbl_enabled(void) } static inline int netlbl_sock_setattr(struct sock *sk, u16 family, - const struct netlbl_lsm_secattr *secattr) + const struct netlbl_lsm_secattr *secattr, + bool sk_locked) { return -ENOSYS; } @@ -673,6 +676,11 @@ static inline struct audit_buffer *netlbl_audit_start(int type, { return NULL; } + +static inline bool netlbl_sk_lock_check(struct sock *sk) +{ + return true; +} #endif /* CONFIG_NETLABEL */ const struct netlbl_calipso_ops * diff --git a/include/net/netlink.h b/include/net/netlink.h index c19ff921b661..61cef3bd2d31 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -41,7 +41,8 @@ * nlmsg_get_pos() return current position in message * nlmsg_trim() trim part of message * nlmsg_cancel() cancel message construction - * nlmsg_free() free a netlink message + * nlmsg_consume() free a netlink message (expected) + * nlmsg_free() free a netlink message (drop) * * Message Sending: * nlmsg_multicast() multicast message to several groups @@ -157,7 +158,11 @@ * nla_parse() parse and validate stream of attrs * nla_parse_nested() parse nested attributes * nla_for_each_attr() loop over all attributes + * nla_for_each_attr_type() loop over all attributes with the + * given type * nla_for_each_nested() loop over the nested attributes + * nla_for_each_nested_type() loop over the nested attributes with + * the given type *========================================================================= */ @@ -1078,7 +1083,7 @@ static inline void nlmsg_cancel(struct sk_buff *skb, struct nlmsghdr *nlh) } /** - * nlmsg_free - free a netlink message + * nlmsg_free - drop a netlink message * @skb: socket buffer of netlink message */ static inline void nlmsg_free(struct sk_buff *skb) @@ -1087,6 +1092,15 @@ static inline void nlmsg_free(struct sk_buff *skb) } /** + * nlmsg_consume - free a netlink message + * @skb: socket buffer of netlink message + */ +static inline void nlmsg_consume(struct sk_buff *skb) +{ + consume_skb(skb); +} + +/** * nlmsg_multicast_filtered - multicast a netlink message with filter function * @sk: netlink socket to spread messages to * @skb: netlink message as socket buffer @@ -2071,6 +2085,18 @@ static inline int nla_total_size_64bit(int payload) pos = nla_next(pos, &(rem))) /** + * nla_for_each_attr_type - iterate over a stream of attributes + * @pos: loop counter, set to current attribute + * @type: required attribute type for @pos + * @head: head of attribute stream + * @len: length of attribute stream + * @rem: initialized to len, holds bytes currently remaining in stream + */ +#define nla_for_each_attr_type(pos, type, head, len, rem) \ + nla_for_each_attr(pos, head, len, rem) \ + if (nla_type(pos) == type) + +/** * nla_for_each_nested - iterate over nested attributes * @pos: loop counter, set to current attribute * @nla: attribute containing the nested attributes @@ -2080,6 +2106,17 @@ static inline int nla_total_size_64bit(int payload) nla_for_each_attr(pos, nla_data(nla), nla_len(nla), rem) /** + * nla_for_each_nested_type - iterate over nested attributes + * @pos: loop counter, set to current attribute + * @type: required attribute type for @pos + * @nla: attribute containing the nested attributes + * @rem: initialized to len, holds bytes currently remaining in stream + */ +#define nla_for_each_nested_type(pos, type, nla, rem) \ + nla_for_each_nested(pos, nla, rem) \ + if (nla_type(pos) == type) + +/** * nla_is_last - Test if attribute is last in stream * @nla: attribute to test * @rem: bytes remaining in stream diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 7ca315ad500e..68463aebcc05 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -267,7 +267,7 @@ static inline bool nexthop_get(struct nexthop *nh) static inline void nexthop_put(struct nexthop *nh) { if (refcount_dec_and_test(&nh->refcnt)) - call_rcu(&nh->rcu, nexthop_free_rcu); + call_rcu_hurry(&nh->rcu, nexthop_free_rcu); } static inline bool nexthop_cmp(const struct nexthop *nh1, diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 1d397c1a0043..873631c79ab1 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -52,13 +52,15 @@ #ifndef _NET_PAGE_POOL_HELPERS_H #define _NET_PAGE_POOL_HELPERS_H +#include <linux/dma-mapping.h> + #include <net/page_pool/types.h> #ifdef CONFIG_PAGE_POOL_STATS /* Deprecated driver-facing API, use netlink instead */ int page_pool_ethtool_stats_get_count(void); u8 *page_pool_ethtool_stats_get_strings(u8 *data); -u64 *page_pool_ethtool_stats_get(u64 *data, void *stats); +u64 *page_pool_ethtool_stats_get(u64 *data, const void *stats); bool page_pool_get_stats(const struct page_pool *pool, struct page_pool_stats *stats); @@ -73,7 +75,7 @@ static inline u8 *page_pool_ethtool_stats_get_strings(u8 *data) return data; } -static inline u64 *page_pool_ethtool_stats_get(u64 *data, void *stats) +static inline u64 *page_pool_ethtool_stats_get(u64 *data, const void *stats) { return data; } @@ -204,8 +206,8 @@ static inline void *page_pool_dev_alloc_va(struct page_pool *pool, * Get the stored dma direction. A driver might decide to store this locally * and avoid the extra cache line from page_pool to determine the direction. */ -static -inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool) +static inline enum dma_data_direction +page_pool_get_dma_dir(const struct page_pool *pool) { return pool->p.dma_dir; } @@ -370,7 +372,7 @@ static inline void page_pool_free_va(struct page_pool *pool, void *va, * Fetch the DMA address of the page. The page pool to which the page belongs * must had been created with PP_FLAG_DMA_MAP. */ -static inline dma_addr_t page_pool_get_dma_addr(struct page *page) +static inline dma_addr_t page_pool_get_dma_addr(const struct page *page) { dma_addr_t ret = page->dma_addr; @@ -395,6 +397,28 @@ static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr) return false; } +/** + * page_pool_dma_sync_for_cpu - sync Rx page for CPU after it's written by HW + * @pool: &page_pool the @page belongs to + * @page: page to sync + * @offset: offset from page start to "hard" start if using PP frags + * @dma_sync_size: size of the data written to the page + * + * Can be used as a shorthand to sync Rx pages before accessing them in the + * driver. Caller must ensure the pool was created with ``PP_FLAG_DMA_MAP``. + * Note that this version performs DMA sync unconditionally, even if the + * associated PP doesn't perform sync-for-device. + */ +static inline void page_pool_dma_sync_for_cpu(const struct page_pool *pool, + const struct page *page, + u32 offset, u32 dma_sync_size) +{ + dma_sync_single_range_for_cpu(pool->p.dev, + page_pool_get_dma_addr(page), + offset + pool->p.offset, dma_sync_size, + page_pool_get_dma_dir(pool)); +} + static inline bool page_pool_put(struct page_pool *pool) { return refcount_dec_and_test(&pool->user_cnt); diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 5e43a08d3231..a6ebed002216 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -213,7 +213,7 @@ struct xdp_mem_info; #ifdef CONFIG_PAGE_POOL void page_pool_destroy(struct page_pool *pool); void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), - struct xdp_mem_info *mem); + const struct xdp_mem_info *mem); void page_pool_put_page_bulk(struct page_pool *pool, void **data, int count); #else @@ -223,7 +223,7 @@ static inline void page_pool_destroy(struct page_pool *pool) static inline void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), - struct xdp_mem_info *mem) + const struct xdp_mem_info *mem) { } diff --git a/include/net/pfcp.h b/include/net/pfcp.h new file mode 100644 index 000000000000..af14f970b80e --- /dev/null +++ b/include/net/pfcp.h @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _PFCP_H_ +#define _PFCP_H_ + +#include <uapi/linux/if_ether.h> +#include <net/dst_metadata.h> +#include <linux/netdevice.h> +#include <uapi/linux/ipv6.h> +#include <net/udp_tunnel.h> +#include <uapi/linux/udp.h> +#include <uapi/linux/ip.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/bits.h> + +#define PFCP_PORT 8805 + +/* PFCP protocol header */ +struct pfcphdr { + u8 flags; + u8 message_type; + __be16 message_length; +}; + +/* PFCP header flags */ +#define PFCP_SEID_FLAG BIT(0) +#define PFCP_MP_FLAG BIT(1) + +#define PFCP_VERSION_MASK GENMASK(4, 0) + +#define PFCP_HLEN (sizeof(struct udphdr) + sizeof(struct pfcphdr)) + +/* PFCP node related messages */ +struct pfcphdr_node { + u8 seq_number[3]; + u8 reserved; +}; + +/* PFCP session related messages */ +struct pfcphdr_session { + __be64 seid; + u8 seq_number[3]; +#ifdef __LITTLE_ENDIAN_BITFIELD + u8 message_priority:4, + reserved:4; +#elif defined(__BIG_ENDIAN_BITFIELD) + u8 reserved:4, + message_priprity:4; +#else +#error "Please fix <asm/byteorder>" +#endif +}; + +struct pfcp_metadata { + u8 type; + __be64 seid; +} __packed; + +enum { + PFCP_TYPE_NODE = 0, + PFCP_TYPE_SESSION = 1, +}; + +#define PFCP_HEADROOM (sizeof(struct iphdr) + sizeof(struct udphdr) + \ + sizeof(struct pfcphdr) + sizeof(struct ethhdr)) +#define PFCP6_HEADROOM (sizeof(struct ipv6hdr) + sizeof(struct udphdr) + \ + sizeof(struct pfcphdr) + sizeof(struct ethhdr)) + +static inline struct pfcphdr *pfcp_hdr(struct sk_buff *skb) +{ + return (struct pfcphdr *)(udp_hdr(skb) + 1); +} + +static inline struct pfcphdr_node *pfcp_hdr_node(struct sk_buff *skb) +{ + return (struct pfcphdr_node *)(pfcp_hdr(skb) + 1); +} + +static inline struct pfcphdr_session *pfcp_hdr_session(struct sk_buff *skb) +{ + return (struct pfcphdr_session *)(pfcp_hdr(skb) + 1); +} + +static inline bool netif_is_pfcp(const struct net_device *dev) +{ + return dev->rtnl_link_ops && + !strcmp(dev->rtnl_link_ops->kind, "pfcp"); +} + +#endif diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index a4ee43f493bb..41297bd38dff 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -74,6 +74,15 @@ static inline bool tcf_block_non_null_shared(struct tcf_block *block) return block && block->index; } +#ifdef CONFIG_NET_CLS_ACT +DECLARE_STATIC_KEY_FALSE(tcf_bypass_check_needed_key); + +static inline bool tcf_block_bypass_sw(struct tcf_block *block) +{ + return block && block->bypass_wanted; +} +#endif + static inline struct Qdisc *tcf_block_q(struct tcf_block *block) { WARN_ON(tcf_block_shared(block)); diff --git a/include/net/proto_memory.h b/include/net/proto_memory.h new file mode 100644 index 000000000000..a6ab2f4f5e28 --- /dev/null +++ b/include/net/proto_memory.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _PROTO_MEMORY_H +#define _PROTO_MEMORY_H + +#include <net/sock.h> +#include <net/hotdata.h> + +/* 1 MB per cpu, in page units */ +#define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT)) + +static inline bool sk_has_memory_pressure(const struct sock *sk) +{ + return sk->sk_prot->memory_pressure != NULL; +} + +static inline bool +proto_memory_pressure(const struct proto *prot) +{ + if (!prot->memory_pressure) + return false; + return !!READ_ONCE(*prot->memory_pressure); +} + +static inline bool sk_under_global_memory_pressure(const struct sock *sk) +{ + return proto_memory_pressure(sk->sk_prot); +} + +static inline bool sk_under_memory_pressure(const struct sock *sk) +{ + if (!sk->sk_prot->memory_pressure) + return false; + + if (mem_cgroup_sockets_enabled && sk->sk_memcg && + mem_cgroup_under_socket_pressure(sk->sk_memcg)) + return true; + + return !!READ_ONCE(*sk->sk_prot->memory_pressure); +} + +static inline long +proto_memory_allocated(const struct proto *prot) +{ + return max(0L, atomic_long_read(prot->memory_allocated)); +} + +static inline long +sk_memory_allocated(const struct sock *sk) +{ + return proto_memory_allocated(sk->sk_prot); +} + +static inline void proto_memory_pcpu_drain(struct proto *proto) +{ + int val = this_cpu_xchg(*proto->per_cpu_fw_alloc, 0); + + if (val) + atomic_long_add(val, proto->memory_allocated); +} + +static inline void +sk_memory_allocated_add(const struct sock *sk, int val) +{ + struct proto *proto = sk->sk_prot; + + val = this_cpu_add_return(*proto->per_cpu_fw_alloc, val); + + if (unlikely(val >= READ_ONCE(net_hotdata.sysctl_mem_pcpu_rsv))) + proto_memory_pcpu_drain(proto); +} + +static inline void +sk_memory_allocated_sub(const struct sock *sk, int val) +{ + struct proto *proto = sk->sk_prot; + + val = this_cpu_sub_return(*proto->per_cpu_fw_alloc, val); + + if (unlikely(val <= -READ_ONCE(net_hotdata.sysctl_mem_pcpu_rsv))) + proto_memory_pcpu_drain(proto); +} + +#endif /* _PROTO_MEMORY_H */ diff --git a/include/net/red.h b/include/net/red.h index 425364de0df7..802287d52c9e 100644 --- a/include/net/red.h +++ b/include/net/red.h @@ -233,10 +233,10 @@ static inline void red_set_parms(struct red_parms *p, int delta = qth_max - qth_min; u32 max_p_delta; - p->qth_min = qth_min << Wlog; - p->qth_max = qth_max << Wlog; - p->Wlog = Wlog; - p->Plog = Plog; + WRITE_ONCE(p->qth_min, qth_min << Wlog); + WRITE_ONCE(p->qth_max, qth_max << Wlog); + WRITE_ONCE(p->Wlog, Wlog); + WRITE_ONCE(p->Plog, Plog); if (delta <= 0) delta = 1; p->qth_delta = delta; @@ -244,7 +244,7 @@ static inline void red_set_parms(struct red_parms *p, max_P = red_maxp(Plog); max_P *= delta; /* max_P = (qth_max - qth_min)/2^Plog */ } - p->max_P = max_P; + WRITE_ONCE(p->max_P, max_P); max_p_delta = max_P / delta; max_p_delta = max(max_p_delta, 1U); p->max_P_reciprocal = reciprocal_value(max_p_delta); @@ -257,7 +257,7 @@ static inline void red_set_parms(struct red_parms *p, p->target_min = qth_min + 2*delta; p->target_max = qth_min + 3*delta; - p->Scell_log = Scell_log; + WRITE_ONCE(p->Scell_log, Scell_log); p->Scell_max = (255 << Scell_log); if (stab) diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 8839133d6f6b..bdc737832da6 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -18,6 +18,7 @@ #include <linux/refcount.h> #include <net/sock.h> +#include <net/rstreason.h> struct request_sock; struct sk_buff; @@ -34,7 +35,8 @@ struct request_sock_ops { void (*send_ack)(const struct sock *sk, struct sk_buff *skb, struct request_sock *req); void (*send_reset)(const struct sock *sk, - struct sk_buff *skb); + struct sk_buff *skb, + enum sk_rst_reason reason); void (*destructor)(struct request_sock *req); void (*syn_ack_timeout)(const struct request_sock *req); }; @@ -61,7 +63,11 @@ struct request_sock { struct request_sock *dl_next; u16 mss; u8 num_retrans; /* number of retransmits */ - u8 syncookie:1; /* syncookie: encode tcpopts in timestamp */ + u8 syncookie:1; /* True if + * 1) tcpopts needs to be encoded in + * TS of SYN+ACK + * 2) ACK is validated by BPF kfunc. + */ u8 num_timeout:7; /* number of timeouts */ u32 ts_recent; struct timer_list rsk_timer; @@ -144,6 +150,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, sk_node_init(&req_to_sk(req)->sk_node); sk_tx_queue_clear(req_to_sk(req)); req->saved_syn = NULL; + req->syncookie = 0; req->timeout = 0; req->num_timeout = 0; req->num_retrans = 0; diff --git a/include/net/route.h b/include/net/route.h index d4a0147942f1..93833cfe9c96 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -35,8 +35,6 @@ #include <linux/cache.h> #include <linux/security.h> -#define RTO_ONLINK 0x01 - static inline __u8 ip_sock_rt_scope(const struct sock *sk) { if (sock_flag(sk, SOCK_LOCALROUTE)) @@ -77,6 +75,17 @@ struct rtable { rt_pmtu:31; }; +#define dst_rtable(_ptr) container_of_const(_ptr, struct rtable, dst) + +/** + * skb_rtable - Returns the skb &rtable + * @skb: buffer + */ +static inline struct rtable *skb_rtable(const struct sk_buff *skb) +{ + return dst_rtable(skb_dst(skb)); +} + static inline bool rt_is_input_route(const struct rtable *rt) { return rt->rt_is_input != 0; @@ -141,15 +150,22 @@ static inline struct rtable *ip_route_output_key(struct net *net, struct flowi4 return ip_route_output_flow(net, flp, NULL); } +/* Simplistic IPv4 route lookup function. + * This is only suitable for some particular use cases: since the flowi4 + * structure is only partially set, it may bypass some fib-rules. + */ static inline struct rtable *ip_route_output(struct net *net, __be32 daddr, - __be32 saddr, u8 tos, int oif) + __be32 saddr, u8 tos, int oif, + __u8 scope) { struct flowi4 fl4 = { .flowi4_oif = oif, .flowi4_tos = tos, + .flowi4_scope = scope, .daddr = daddr, .saddr = saddr, }; + return ip_route_output_key(net, &fl4); } diff --git a/include/net/rps.h b/include/net/rps.h index 7660243e905b..a93401d23d66 100644 --- a/include/net/rps.h +++ b/include/net/rps.h @@ -122,4 +122,32 @@ static inline void sock_rps_record_flow(const struct sock *sk) #endif } +static inline u32 rps_input_queue_tail_incr(struct softnet_data *sd) +{ +#ifdef CONFIG_RPS + return ++sd->input_queue_tail; +#else + return 0; +#endif +} + +static inline void rps_input_queue_tail_save(u32 *dest, u32 tail) +{ +#ifdef CONFIG_RPS + WRITE_ONCE(*dest, tail); +#endif +} + +static inline void rps_input_queue_head_add(struct softnet_data *sd, int val) +{ +#ifdef CONFIG_RPS + WRITE_ONCE(sd->input_queue_head, sd->input_queue_head + val); +#endif +} + +static inline void rps_input_queue_head_incr(struct softnet_data *sd) +{ + rps_input_queue_head_add(sd, 1); +} + #endif /* _NET_RPS_H */ diff --git a/include/net/rstreason.h b/include/net/rstreason.h new file mode 100644 index 000000000000..2575c85d7f7a --- /dev/null +++ b/include/net/rstreason.h @@ -0,0 +1,182 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _LINUX_RSTREASON_H +#define _LINUX_RSTREASON_H +#include <net/dropreason-core.h> +#include <uapi/linux/mptcp.h> + +#define DEFINE_RST_REASON(FN, FNe) \ + FN(NOT_SPECIFIED) \ + FN(NO_SOCKET) \ + FN(TCP_INVALID_ACK_SEQUENCE) \ + FN(TCP_RFC7323_PAWS) \ + FN(TCP_TOO_OLD_ACK) \ + FN(TCP_ACK_UNSENT_DATA) \ + FN(TCP_FLAGS) \ + FN(TCP_OLD_ACK) \ + FN(TCP_ABORT_ON_DATA) \ + FN(TCP_TIMEWAIT_SOCKET) \ + FN(INVALID_SYN) \ + FN(MPTCP_RST_EUNSPEC) \ + FN(MPTCP_RST_EMPTCP) \ + FN(MPTCP_RST_ERESOURCE) \ + FN(MPTCP_RST_EPROHIBIT) \ + FN(MPTCP_RST_EWQ2BIG) \ + FN(MPTCP_RST_EBADPERF) \ + FN(MPTCP_RST_EMIDDLEBOX) \ + FN(ERROR) \ + FNe(MAX) + +/** + * enum sk_rst_reason - the reasons of socket reset + * + * The reasons of sk reset, which are used in DCCP/TCP/MPTCP protocols. + * + * There are three parts in order: + * 1) skb drop reasons: relying on drop reasons for such as passive reset + * 2) independent reset reasons: such as active reset reasons + * 3) reset reasons in MPTCP: only for MPTCP use + */ +enum sk_rst_reason { + /* Refer to include/net/dropreason-core.h + * Rely on skb drop reasons because it indicates exactly why RST + * could happen. + */ + /** @SK_RST_REASON_NOT_SPECIFIED: reset reason is not specified */ + SK_RST_REASON_NOT_SPECIFIED, + /** @SK_RST_REASON_NO_SOCKET: no valid socket that can be used */ + SK_RST_REASON_NO_SOCKET, + /** + * @SK_RST_REASON_TCP_INVALID_ACK_SEQUENCE: Not acceptable ACK SEQ + * field because ack sequence is not in the window between snd_una + * and snd_nxt + */ + SK_RST_REASON_TCP_INVALID_ACK_SEQUENCE, + /** + * @SK_RST_REASON_TCP_RFC7323_PAWS: PAWS check, corresponding to + * LINUX_MIB_PAWSESTABREJECTED, LINUX_MIB_PAWSACTIVEREJECTED + */ + SK_RST_REASON_TCP_RFC7323_PAWS, + /** @SK_RST_REASON_TCP_TOO_OLD_ACK: TCP ACK is too old */ + SK_RST_REASON_TCP_TOO_OLD_ACK, + /** + * @SK_RST_REASON_TCP_ACK_UNSENT_DATA: TCP ACK for data we haven't + * sent yet + */ + SK_RST_REASON_TCP_ACK_UNSENT_DATA, + /** @SK_RST_REASON_TCP_FLAGS: TCP flags invalid */ + SK_RST_REASON_TCP_FLAGS, + /** @SK_RST_REASON_TCP_OLD_ACK: TCP ACK is old, but in window */ + SK_RST_REASON_TCP_OLD_ACK, + /** + * @SK_RST_REASON_TCP_ABORT_ON_DATA: abort on data + * corresponding to LINUX_MIB_TCPABORTONDATA + */ + SK_RST_REASON_TCP_ABORT_ON_DATA, + + /* Here start with the independent reasons */ + /** @SK_RST_REASON_TCP_TIMEWAIT_SOCKET: happen on the timewait socket */ + SK_RST_REASON_TCP_TIMEWAIT_SOCKET, + /** + * @SK_RST_REASON_INVALID_SYN: receive bad syn packet + * RFC 793 says if the state is not CLOSED/LISTEN/SYN-SENT then + * "fourth, check the SYN bit,...If the SYN is in the window it is + * an error, send a reset" + */ + SK_RST_REASON_INVALID_SYN, + + /* Copy from include/uapi/linux/mptcp.h. + * These reset fields will not be changed since they adhere to + * RFC 8684. So do not touch them. I'm going to list each definition + * of them respectively. + */ + /** + * @SK_RST_REASON_MPTCP_RST_EUNSPEC: Unspecified error. + * This is the default error; it implies that the subflow is no + * longer available. The presence of this option shows that the + * RST was generated by an MPTCP-aware device. + */ + SK_RST_REASON_MPTCP_RST_EUNSPEC, + /** + * @SK_RST_REASON_MPTCP_RST_EMPTCP: MPTCP-specific error. + * An error has been detected in the processing of MPTCP options. + * This is the usual reason code to return in the cases where a RST + * is being sent to close a subflow because of an invalid response. + */ + SK_RST_REASON_MPTCP_RST_EMPTCP, + /** + * @SK_RST_REASON_MPTCP_RST_ERESOURCE: Lack of resources. + * This code indicates that the sending host does not have enough + * resources to support the terminated subflow. + */ + SK_RST_REASON_MPTCP_RST_ERESOURCE, + /** + * @SK_RST_REASON_MPTCP_RST_EPROHIBIT: Administratively prohibited. + * This code indicates that the requested subflow is prohibited by + * the policies of the sending host. + */ + SK_RST_REASON_MPTCP_RST_EPROHIBIT, + /** + * @SK_RST_REASON_MPTCP_RST_EWQ2BIG: Too much outstanding data. + * This code indicates that there is an excessive amount of data + * that needs to be transmitted over the terminated subflow while + * having already been acknowledged over one or more other subflows. + * This may occur if a path has been unavailable for a short period + * and it is more efficient to reset and start again than it is to + * retransmit the queued data. + */ + SK_RST_REASON_MPTCP_RST_EWQ2BIG, + /** + * @SK_RST_REASON_MPTCP_RST_EBADPERF: Unacceptable performance. + * This code indicates that the performance of this subflow was + * too low compared to the other subflows of this Multipath TCP + * connection. + */ + SK_RST_REASON_MPTCP_RST_EBADPERF, + /** + * @SK_RST_REASON_MPTCP_RST_EMIDDLEBOX: Middlebox interference. + * Middlebox interference has been detected over this subflow, + * making MPTCP signaling invalid. For example, this may be sent + * if the checksum does not validate. + */ + SK_RST_REASON_MPTCP_RST_EMIDDLEBOX, + + /** @SK_RST_REASON_ERROR: unexpected error happens */ + SK_RST_REASON_ERROR, + + /** + * @SK_RST_REASON_MAX: Maximum of socket reset reasons. + * It shouldn't be used as a real 'reason'. + */ + SK_RST_REASON_MAX, +}; + +/* Convert skb drop reasons to enum sk_rst_reason type */ +static inline enum sk_rst_reason +sk_rst_convert_drop_reason(enum skb_drop_reason reason) +{ + switch (reason) { + case SKB_DROP_REASON_NOT_SPECIFIED: + return SK_RST_REASON_NOT_SPECIFIED; + case SKB_DROP_REASON_NO_SOCKET: + return SK_RST_REASON_NO_SOCKET; + case SKB_DROP_REASON_TCP_INVALID_ACK_SEQUENCE: + return SK_RST_REASON_TCP_INVALID_ACK_SEQUENCE; + case SKB_DROP_REASON_TCP_RFC7323_PAWS: + return SK_RST_REASON_TCP_RFC7323_PAWS; + case SKB_DROP_REASON_TCP_TOO_OLD_ACK: + return SK_RST_REASON_TCP_TOO_OLD_ACK; + case SKB_DROP_REASON_TCP_ACK_UNSENT_DATA: + return SK_RST_REASON_TCP_ACK_UNSENT_DATA; + case SKB_DROP_REASON_TCP_FLAGS: + return SK_RST_REASON_TCP_FLAGS; + case SKB_DROP_REASON_TCP_OLD_ACK: + return SK_RST_REASON_TCP_OLD_ACK; + case SKB_DROP_REASON_TCP_ABORT_ON_DATA: + return SK_RST_REASON_TCP_ABORT_ON_DATA; + default: + /* If we don't have our own corresponding reason */ + return SK_RST_REASON_NOT_SPECIFIED; + } +} +#endif diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index cefe0c4bdae3..79edd5b5e3c9 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -117,6 +117,7 @@ struct Qdisc { struct qdisc_skb_head q; struct gnet_stats_basic_sync bstats; struct gnet_stats_queue qstats; + int owner; unsigned long state; unsigned long state2; /* must be written under qdisc spinlock */ struct Qdisc *next_sched; @@ -127,6 +128,7 @@ struct Qdisc { struct rcu_head rcu; netdevice_tracker dev_tracker; + struct lock_class_key root_lock_key; /* private data */ long privdata[] ____cacheline_aligned; }; @@ -422,6 +424,7 @@ struct tcf_proto { */ spinlock_t lock; bool deleting; + bool counted; refcount_t refcnt; struct rcu_head rcu; struct hlist_node destroy_ht_node; @@ -471,6 +474,9 @@ struct tcf_block { struct flow_block flow_block; struct list_head owner_list; bool keep_dst; + bool bypass_wanted; + atomic_t filtercnt; /* Number of filters */ + atomic_t skipswcnt; /* Number of skip_sw filters */ atomic_t offloadcnt; /* Number of oddloaded filters */ unsigned int nooffloaddevcnt; /* Number of devs unable to do offload */ unsigned int lockeddevcnt; /* Number of devs that require rtnl lock. */ diff --git a/include/net/scm.h b/include/net/scm.h index 92276a2c5543..0d35c7c77a74 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -23,10 +23,20 @@ struct scm_creds { kgid_t gid; }; +#ifdef CONFIG_UNIX +struct unix_edge; +#endif + struct scm_fp_list { short count; short count_unix; short max; +#ifdef CONFIG_UNIX + bool inflight; + bool dead; + struct list_head vertices; + struct unix_edge *edges; +#endif struct user_struct *user; struct file *fp[SCM_MAX_FD]; }; diff --git a/include/net/smc.h b/include/net/smc.h index c9dcb30e3fd9..db84e4e35080 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -26,9 +26,6 @@ struct smc_hashinfo { struct hlist_head ht; }; -int smc_hash_sk(struct sock *sk); -void smc_unhash_sk(struct sock *sk); - /* SMCD/ISM device driver interface */ struct smcd_dmb { u64 dmb_tok; @@ -50,7 +47,6 @@ struct smcd_dmb { #define ISM_ERROR 0xFFFF struct smcd_dev; -struct ism_client; struct smcd_gid { u64 gid; @@ -61,14 +57,8 @@ struct smcd_ops { int (*query_remote_gid)(struct smcd_dev *dev, struct smcd_gid *rgid, u32 vid_valid, u32 vid); int (*register_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb, - struct ism_client *client); + void *client); int (*unregister_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb); - int (*add_vlan_id)(struct smcd_dev *dev, u64 vlan_id); - int (*del_vlan_id)(struct smcd_dev *dev, u64 vlan_id); - int (*set_vlan_required)(struct smcd_dev *dev); - int (*reset_vlan_required)(struct smcd_dev *dev); - int (*signal_event)(struct smcd_dev *dev, struct smcd_gid *rgid, - u32 trigger_irq, u32 event_code, u64 info); int (*move_data)(struct smcd_dev *dev, u64 dmb_tok, unsigned int idx, bool sf, unsigned int offset, void *data, unsigned int size); @@ -76,11 +66,23 @@ struct smcd_ops { void (*get_local_gid)(struct smcd_dev *dev, struct smcd_gid *gid); u16 (*get_chid)(struct smcd_dev *dev); struct device* (*get_dev)(struct smcd_dev *dev); + + /* optional operations */ + int (*add_vlan_id)(struct smcd_dev *dev, u64 vlan_id); + int (*del_vlan_id)(struct smcd_dev *dev, u64 vlan_id); + int (*set_vlan_required)(struct smcd_dev *dev); + int (*reset_vlan_required)(struct smcd_dev *dev); + int (*signal_event)(struct smcd_dev *dev, struct smcd_gid *rgid, + u32 trigger_irq, u32 event_code, u64 info); + int (*support_dmb_nocopy)(struct smcd_dev *dev); + int (*attach_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb); + int (*detach_dmb)(struct smcd_dev *dev, u64 token); }; struct smcd_dev { const struct smcd_ops *ops; void *priv; + void *client; struct list_head list; spinlock_t lock; struct smc_connection **conn; diff --git a/include/net/sock.h b/include/net/sock.h index b5e00702acc1..0450494a1766 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1371,73 +1371,6 @@ static inline int sk_under_cgroup_hierarchy(struct sock *sk, #endif } -static inline bool sk_has_memory_pressure(const struct sock *sk) -{ - return sk->sk_prot->memory_pressure != NULL; -} - -static inline bool sk_under_global_memory_pressure(const struct sock *sk) -{ - return sk->sk_prot->memory_pressure && - !!READ_ONCE(*sk->sk_prot->memory_pressure); -} - -static inline bool sk_under_memory_pressure(const struct sock *sk) -{ - if (!sk->sk_prot->memory_pressure) - return false; - - if (mem_cgroup_sockets_enabled && sk->sk_memcg && - mem_cgroup_under_socket_pressure(sk->sk_memcg)) - return true; - - return !!READ_ONCE(*sk->sk_prot->memory_pressure); -} - -static inline long -proto_memory_allocated(const struct proto *prot) -{ - return max(0L, atomic_long_read(prot->memory_allocated)); -} - -static inline long -sk_memory_allocated(const struct sock *sk) -{ - return proto_memory_allocated(sk->sk_prot); -} - -/* 1 MB per cpu, in page units */ -#define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT)) -extern int sysctl_mem_pcpu_rsv; - -static inline void -sk_memory_allocated_add(struct sock *sk, int amt) -{ - int local_reserve; - - preempt_disable(); - local_reserve = __this_cpu_add_return(*sk->sk_prot->per_cpu_fw_alloc, amt); - if (local_reserve >= READ_ONCE(sysctl_mem_pcpu_rsv)) { - __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve); - atomic_long_add(local_reserve, sk->sk_prot->memory_allocated); - } - preempt_enable(); -} - -static inline void -sk_memory_allocated_sub(struct sock *sk, int amt) -{ - int local_reserve; - - preempt_disable(); - local_reserve = __this_cpu_sub_return(*sk->sk_prot->per_cpu_fw_alloc, amt); - if (local_reserve <= -READ_ONCE(sysctl_mem_pcpu_rsv)) { - __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve); - atomic_long_add(local_reserve, sk->sk_prot->memory_allocated); - } - preempt_enable(); -} - #define SK_ALLOC_PERCPU_COUNTER_BATCH 16 static inline void sk_sockets_allocated_dec(struct sock *sk) @@ -1464,15 +1397,6 @@ proto_sockets_allocated_sum_positive(struct proto *prot) return percpu_counter_sum_positive(prot->sockets_allocated); } -static inline bool -proto_memory_pressure(struct proto *prot) -{ - if (!prot->memory_pressure) - return false; - return !!READ_ONCE(*prot->memory_pressure); -} - - #ifdef CONFIG_PROC_FS #define PROTO_INUSE_NR 64 /* should be enough for the first time */ struct prot_inuse { @@ -1759,6 +1683,13 @@ static inline void sock_owned_by_me(const struct sock *sk) #endif } +static inline void sock_not_owned_by_me(const struct sock *sk) +{ +#ifdef CONFIG_LOCKDEP + WARN_ON_ONCE(lockdep_sock_is_held(sk) && debug_locks); +#endif +} + static inline bool sock_owned_by_user(const struct sock *sk) { sock_owned_by_me(sk); @@ -2506,6 +2437,12 @@ static inline void sk_wake_async(const struct sock *sk, int how, int band) } } +static inline void sk_wake_async_rcu(const struct sock *sk, int how, int band) +{ + if (unlikely(sock_flag(sk, SOCK_FASYNC))) + sock_wake_async(rcu_dereference(sk->sk_wq), how, band); +} + /* Since sk_{r,w}mem_alloc sums skb->truesize, even a small frame might * need sizeof(sk_buff) + MTU + padding, unless net driver perform copybreak. * Note: for send buffers, TCP works better if we can build two skbs at @@ -2822,12 +2759,10 @@ static inline struct sk_buff *sk_validate_xmit_skb(struct sk_buff *skb, if (sk && sk_fullsock(sk) && sk->sk_validate_xmit_skb) { skb = sk->sk_validate_xmit_skb(sk, dev, skb); -#ifdef CONFIG_TLS_DEVICE - } else if (unlikely(skb->decrypted)) { + } else if (unlikely(skb_is_decrypted(skb))) { pr_warn_ratelimited("unencrypted skb with no associated socket - dropping\n"); kfree_skb(skb); skb = NULL; -#endif } #endif diff --git a/include/net/tcp.h b/include/net/tcp.h index 6ae35199d3b3..060e95b331a2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -52,6 +52,8 @@ extern struct inet_hashinfo tcp_hashinfo; DECLARE_PER_CPU(unsigned int, tcp_orphan_count); int tcp_orphan_count_sum(void); +DECLARE_PER_CPU(u32, tcp_tw_isn); + void tcp_time_wait(struct sock *sk, int state, int timeo); #define MAX_TCP_HEADER L1_CACHE_ALIGN(128 + MAX_HEADER) @@ -294,14 +296,6 @@ static inline bool between(__u32 seq1, __u32 seq2, __u32 seq3) return seq3 - seq2 >= seq1 - seq2; } -static inline bool tcp_out_of_memory(struct sock *sk) -{ - if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF && - sk_memory_allocated(sk) > sk_prot_mem_limits(sk, 2)) - return true; - return false; -} - static inline void tcp_wmem_free_skb(struct sock *sk, struct sk_buff *skb) { sk_wmem_queued_add(sk, -skb->truesize); @@ -314,7 +308,7 @@ static inline void tcp_wmem_free_skb(struct sock *sk, struct sk_buff *skb) void sk_forced_mem_schedule(struct sock *sk, int size); -bool tcp_check_oom(struct sock *sk, int shift); +bool tcp_check_oom(const struct sock *sk, int shift); extern struct proto tcp_prot; @@ -353,7 +347,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb); void tcp_rcv_space_adjust(struct sock *sk); int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp); void tcp_twsk_destructor(struct sock *sk); -void tcp_twsk_purge(struct list_head *net_exit_list, int family); +void tcp_twsk_purge(struct list_head *net_exit_list); ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); @@ -392,7 +386,8 @@ enum tcp_tw_status { enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, - const struct tcphdr *th); + const struct tcphdr *th, + u32 *tw_isn); struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req, bool fastopen, bool *lost_race); @@ -667,7 +662,8 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, void tcp_send_probe0(struct sock *); int tcp_write_wakeup(struct sock *, int mib); void tcp_send_fin(struct sock *sk); -void tcp_send_active_reset(struct sock *sk, gfp_t priority); +void tcp_send_active_reset(struct sock *sk, gfp_t priority, + enum sk_rst_reason reason); int tcp_send_synack(struct sock *); void tcp_push_one(struct sock *, unsigned int mss_now); void __tcp_send_ack(struct sock *sk, u32 rcv_nxt); @@ -742,7 +738,7 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu); int tcp_mss_to_mtu(struct sock *sk, int mss); void tcp_mtup_init(struct sock *sk); -static inline void tcp_bound_rto(const struct sock *sk) +static inline void tcp_bound_rto(struct sock *sk) { if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX) inet_csk(sk)->icsk_rto = TCP_RTO_MAX; @@ -925,6 +921,19 @@ static inline u32 tcp_rsk_tsval(const struct tcp_request_sock *treq) #define TCPHDR_SYN_ECN (TCPHDR_SYN | TCPHDR_ECE | TCPHDR_CWR) +/* State flags for sacked in struct tcp_skb_cb */ +enum tcp_skb_cb_sacked_flags { + TCPCB_SACKED_ACKED = (1 << 0), /* SKB ACK'd by a SACK block */ + TCPCB_SACKED_RETRANS = (1 << 1), /* SKB retransmitted */ + TCPCB_LOST = (1 << 2), /* SKB is lost */ + TCPCB_TAGBITS = (TCPCB_SACKED_ACKED | TCPCB_SACKED_RETRANS | + TCPCB_LOST), /* All tag bits */ + TCPCB_REPAIRED = (1 << 4), /* SKB repaired (no skb_mstamp_ns) */ + TCPCB_EVER_RETRANS = (1 << 7), /* Ever retransmitted frame */ + TCPCB_RETRANS = (TCPCB_SACKED_RETRANS | TCPCB_EVER_RETRANS | + TCPCB_REPAIRED), +}; + /* This is what the send packet queuing engine uses to pass * TCP per-packet control information to the transmission code. * We also store the host-order sequence numbers in here too. @@ -935,13 +944,10 @@ struct tcp_skb_cb { __u32 seq; /* Starting sequence number */ __u32 end_seq; /* SEQ + FIN + SYN + datalen */ union { - /* Note : tcp_tw_isn is used in input path only - * (isn chosen by tcp_timewait_state_process()) - * + /* Note : * tcp_gso_segs/size are used in write queue only, * cf tcp_skb_pcount()/tcp_skb_mss() */ - __u32 tcp_tw_isn; struct { u16 tcp_gso_segs; u16 tcp_gso_size; @@ -950,15 +956,6 @@ struct tcp_skb_cb { __u8 tcp_flags; /* TCP header flags. (tcp[13]) */ __u8 sacked; /* State flags for SACK. */ -#define TCPCB_SACKED_ACKED 0x01 /* SKB ACK'd by a SACK block */ -#define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */ -#define TCPCB_LOST 0x04 /* SKB is lost */ -#define TCPCB_TAGBITS 0x07 /* All tag bits */ -#define TCPCB_REPAIRED 0x10 /* SKB repaired (no skb_mstamp_ns) */ -#define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */ -#define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS| \ - TCPCB_REPAIRED) - __u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */ __u8 txstamp_ack:1, /* Record TX timestamp for ack? */ eor:1, /* Is skb MSG_EOR marked? */ @@ -1167,7 +1164,7 @@ struct tcp_congestion_ops { /* call when packets are delivered to update cwnd and pacing rate, * after all the ca_state processing. (optional) */ - void (*cong_control)(struct sock *sk, const struct rate_sample *rs); + void (*cong_control)(struct sock *sk, u32 ack, int flag, const struct rate_sample *rs); /* new value of cwnd after loss (required) */ @@ -1539,11 +1536,10 @@ static inline int tcp_space_from_win(const struct sock *sk, int win) return __tcp_space_from_win(tcp_sk(sk)->scaling_ratio, win); } -/* Assume a conservative default of 1200 bytes of payload per 4K page. +/* Assume a 50% default for skb->len/skb->truesize ratio. * This may be adjusted later in tcp_measure_rcv_mss(). */ -#define TCP_DEFAULT_SCALING_RATIO ((1200 << TCP_RMEM_TO_WIN_SCALE) / \ - SKB_TRUESIZE(4096)) +#define TCP_DEFAULT_SCALING_RATIO (1 << (TCP_RMEM_TO_WIN_SCALE - 1)) static inline void tcp_scaling_ratio_init(struct sock *sk) { @@ -2195,7 +2191,10 @@ void tcp_v4_destroy_sock(struct sock *sk); struct sk_buff *tcp_gso_segment(struct sk_buff *skb, netdev_features_t features); -struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb); +struct tcphdr *tcp_gro_pull_header(struct sk_buff *skb); +struct sk_buff *tcp_gro_lookup(struct list_head *head, struct tcphdr *th); +struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb, + struct tcphdr *th); INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff)); INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)); INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff)); @@ -2284,7 +2283,8 @@ struct tcp_request_sock_ops { struct dst_entry *(*route_req)(const struct sock *sk, struct sk_buff *skb, struct flowi *fl, - struct request_sock *req); + struct request_sock *req, + u32 tw_isn); u32 (*init_seq)(const struct sk_buff *skb); u32 (*init_ts_off)(const struct net *net, const struct sk_buff *skb); int (*send_synack)(const struct sock *sk, struct dst_entry *dst, @@ -2706,10 +2706,10 @@ static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk) return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN, 0, NULL) == 1); } -static inline void tcp_bpf_rtt(struct sock *sk) +static inline void tcp_bpf_rtt(struct sock *sk, long mrtt, u32 srtt) { if (BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_RTT_CB_FLAG)) - tcp_call_bpf(sk, BPF_SOCK_OPS_RTT_CB, 0, NULL); + tcp_call_bpf_2arg(sk, BPF_SOCK_OPS_RTT_CB, mrtt, srtt); } #if IS_ENABLED(CONFIG_SMC) diff --git a/include/net/timewait_sock.h b/include/net/timewait_sock.h index 74d2b463cc95..62b3e9f2aed4 100644 --- a/include/net/timewait_sock.h +++ b/include/net/timewait_sock.h @@ -15,18 +15,9 @@ struct timewait_sock_ops { struct kmem_cache *twsk_slab; char *twsk_slab_name; unsigned int twsk_obj_size; - int (*twsk_unique)(struct sock *sk, - struct sock *sktw, void *twp); void (*twsk_destructor)(struct sock *sk); }; -static inline int twsk_unique(struct sock *sk, struct sock *sktw, void *twp) -{ - if (sk->sk_prot->twsk_prot->twsk_unique != NULL) - return sk->sk_prot->twsk_prot->twsk_unique(sk, sktw, twp); - return 0; -} - static inline void twsk_destructor(struct sock *sk) { if (sk->sk_prot->twsk_prot->twsk_destructor != NULL) diff --git a/include/net/tls.h b/include/net/tls.h index 340ad43971e4..3a33924db2bc 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -111,7 +111,8 @@ struct tls_strparser { u32 stopped : 1; u32 copy_mode : 1; u32 mixed_decrypted : 1; - u32 msg_ready : 1; + + bool msg_ready; struct strp_msg stm; @@ -361,7 +362,7 @@ static inline bool tls_is_skb_tx_device_offloaded(const struct sk_buff *skb) static inline struct tls_context *tls_get_ctx(const struct sock *sk) { - struct inet_connection_sock *icsk = inet_csk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); /* Use RCU on icsk_ulp_data only for sock diag code, * TLS data path doesn't need rcu_dereference(). diff --git a/include/net/udp.h b/include/net/udp.h index 488a6d2babcc..c4e05b14b648 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -379,14 +379,7 @@ static inline bool udp_skb_is_linear(struct sk_buff *skb) static inline int copy_linear_skb(struct sk_buff *skb, int len, int off, struct iov_iter *to) { - int n; - - n = copy_to_iter(skb->data + off, len, to); - if (n == len) - return 0; - - iov_iter_revert(to, n); - return -EFAULT; + return copy_to_iter_full(skb->data + off, len, to) ? 0 : -EFAULT; } /* diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index d716214fe03d..a93dc51f6323 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -179,8 +179,8 @@ struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb, struct dst_cache *dst_cache); struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, - __be16 flags, __be64 tunnel_id, - int md_size); + const unsigned long *flags, + __be64 tunnel_id, int md_size); #ifdef CONFIG_INET static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum) diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 3cb4dc9bd70e..3d54de168a6d 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -188,6 +188,8 @@ static inline void xsk_tx_metadata_complete(struct xsk_tx_metadata_compl *compl, { if (!compl) return; + if (!compl->tx_timestamp) + return; *compl->tx_timestamp = ops->tmo_fill_timestamp(priv); } diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 57c743b7e4fe..77ebf5bcf0b9 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -291,6 +291,7 @@ struct xfrm_state { /* Private data of this transformer, format is opaque, * interpreted by xfrm_type methods. */ void *data; + u8 dir; }; static inline struct net *xs_net(struct xfrm_state *x) @@ -1049,6 +1050,9 @@ struct xfrm_offload { #define CRYPTO_INVALID_PACKET_SYNTAX 64 #define CRYPTO_INVALID_PROTOCOL 128 + /* Used to keep whole l2 header for transport mode GRO */ + __u32 orig_mac_len; + __u8 proto; __u8 inner_ipproto; }; |