diff options
Diffstat (limited to 'include/net')
102 files changed, 3472 insertions, 1155 deletions
diff --git a/include/net/6lowpan.h b/include/net/6lowpan.h index da3a77d25fcb..5ab4c9901ccc 100644 --- a/include/net/6lowpan.h +++ b/include/net/6lowpan.h @@ -58,6 +58,9 @@ #include <net/ipv6.h> #include <net/net_namespace.h> +/* special link-layer handling */ +#include <net/mac802154.h> + #define EUI64_ADDR_LEN 8 #define LOWPAN_NHC_MAX_ID_LEN 1 @@ -93,7 +96,7 @@ static inline bool lowpan_is_iphc(u8 dispatch) } #define LOWPAN_PRIV_SIZE(llpriv_size) \ - (sizeof(struct lowpan_priv) + llpriv_size) + (sizeof(struct lowpan_dev) + llpriv_size) enum lowpan_lltypes { LOWPAN_LLTYPE_BTLE, @@ -129,7 +132,7 @@ lowpan_iphc_ctx_is_compression(const struct lowpan_iphc_ctx *ctx) return test_bit(LOWPAN_IPHC_CTX_FLAG_COMPRESSION, &ctx->flags); } -struct lowpan_priv { +struct lowpan_dev { enum lowpan_lltypes lltype; struct dentry *iface_debugfs; struct lowpan_iphc_ctx_table ctx; @@ -138,12 +141,34 @@ struct lowpan_priv { u8 priv[0] __aligned(sizeof(void *)); }; +struct lowpan_802154_neigh { + __le16 short_addr; +}; + static inline -struct lowpan_priv *lowpan_priv(const struct net_device *dev) +struct lowpan_802154_neigh *lowpan_802154_neigh(void *neigh_priv) +{ + return neigh_priv; +} + +static inline +struct lowpan_dev *lowpan_dev(const struct net_device *dev) { return netdev_priv(dev); } +/* private device info */ +struct lowpan_802154_dev { + struct net_device *wdev; /* wpan device ptr */ + u16 fragment_tag; +}; + +static inline struct +lowpan_802154_dev *lowpan_802154_dev(const struct net_device *dev) +{ + return (struct lowpan_802154_dev *)lowpan_dev(dev)->priv; +} + struct lowpan_802154_cb { u16 d_tag; unsigned int d_size; @@ -157,6 +182,22 @@ struct lowpan_802154_cb *lowpan_802154_cb(const struct sk_buff *skb) return (struct lowpan_802154_cb *)skb->cb; } +static inline void lowpan_iphc_uncompress_eui64_lladdr(struct in6_addr *ipaddr, + const void *lladdr) +{ + /* fe:80::XXXX:XXXX:XXXX:XXXX + * \_________________/ + * hwaddr + */ + ipaddr->s6_addr[0] = 0xFE; + ipaddr->s6_addr[1] = 0x80; + memcpy(&ipaddr->s6_addr[8], lladdr, EUI64_ADDR_LEN); + /* second bit-flip (Universe/Local) + * is done according RFC2464 + */ + ipaddr->s6_addr[8] ^= 0x02; +} + #ifdef DEBUG /* print data in line */ static inline void raw_dump_inline(const char *caller, char *msg, @@ -213,6 +254,12 @@ static inline bool lowpan_fetch_skb(struct sk_buff *skb, void *data, return false; } +static inline bool lowpan_802154_is_valid_src_short_addr(__le16 addr) +{ + /* First bit of addr is multicast, reserved or 802.15.4 specific */ + return !(addr & cpu_to_le16(0x8000)); +} + static inline void lowpan_push_hc_data(u8 **hc_ptr, const void *data, const size_t len) { diff --git a/include/net/act_api.h b/include/net/act_api.h index 2a19fe111c78..41e6a24a44b9 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -2,42 +2,14 @@ #define __NET_ACT_API_H /* - * Public police action API for classifiers/qdiscs - */ + * Public action API for classifiers/qdiscs +*/ #include <net/sch_generic.h> #include <net/pkt_sched.h> #include <net/net_namespace.h> #include <net/netns/generic.h> -struct tcf_common { - struct hlist_node tcfc_head; - u32 tcfc_index; - int tcfc_refcnt; - int tcfc_bindcnt; - u32 tcfc_capab; - int tcfc_action; - struct tcf_t tcfc_tm; - struct gnet_stats_basic_packed tcfc_bstats; - struct gnet_stats_queue tcfc_qstats; - struct gnet_stats_rate_est64 tcfc_rate_est; - spinlock_t tcfc_lock; - struct rcu_head tcfc_rcu; - struct gnet_stats_basic_cpu __percpu *cpu_bstats; - struct gnet_stats_queue __percpu *cpu_qstats; -}; -#define tcf_head common.tcfc_head -#define tcf_index common.tcfc_index -#define tcf_refcnt common.tcfc_refcnt -#define tcf_bindcnt common.tcfc_bindcnt -#define tcf_capab common.tcfc_capab -#define tcf_action common.tcfc_action -#define tcf_tm common.tcfc_tm -#define tcf_bstats common.tcfc_bstats -#define tcf_qstats common.tcfc_qstats -#define tcf_rate_est common.tcfc_rate_est -#define tcf_lock common.tcfc_lock -#define tcf_rcu common.tcfc_rcu struct tcf_hashinfo { struct hlist_head *htab; @@ -46,6 +18,44 @@ struct tcf_hashinfo { u32 index; }; +struct tc_action_ops; + +struct tc_action { + const struct tc_action_ops *ops; + __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ + __u32 order; + struct list_head list; + struct tcf_hashinfo *hinfo; + + struct hlist_node tcfa_head; + u32 tcfa_index; + int tcfa_refcnt; + int tcfa_bindcnt; + u32 tcfa_capab; + int tcfa_action; + struct tcf_t tcfa_tm; + struct gnet_stats_basic_packed tcfa_bstats; + struct gnet_stats_queue tcfa_qstats; + struct gnet_stats_rate_est64 tcfa_rate_est; + spinlock_t tcfa_lock; + struct rcu_head tcfa_rcu; + struct gnet_stats_basic_cpu __percpu *cpu_bstats; + struct gnet_stats_queue __percpu *cpu_qstats; +}; +#define tcf_act common.tcfa_act +#define tcf_head common.tcfa_head +#define tcf_index common.tcfa_index +#define tcf_refcnt common.tcfa_refcnt +#define tcf_bindcnt common.tcfa_bindcnt +#define tcf_capab common.tcfa_capab +#define tcf_action common.tcfa_action +#define tcf_tm common.tcfa_tm +#define tcf_bstats common.tcfa_bstats +#define tcf_qstats common.tcfa_qstats +#define tcf_rate_est common.tcfa_rate_est +#define tcf_lock common.tcfa_lock +#define tcf_rcu common.tcfa_rcu + static inline unsigned int tcf_hash(u32 index, unsigned int hmask) { return index & hmask; @@ -76,16 +86,17 @@ static inline void tcf_lastuse_update(struct tcf_t *tm) if (tm->lastuse != now) tm->lastuse = now; + if (unlikely(!tm->firstuse)) + tm->firstuse = now; } -struct tc_action { - void *priv; - const struct tc_action_ops *ops; - __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ - __u32 order; - struct list_head list; - struct tcf_hashinfo *hinfo; -}; +static inline void tcf_tm_dump(struct tcf_t *dtm, const struct tcf_t *stm) +{ + dtm->install = jiffies_to_clock_t(jiffies - stm->install); + dtm->lastuse = jiffies_to_clock_t(jiffies - stm->lastuse); + dtm->firstuse = jiffies_to_clock_t(jiffies - stm->firstuse); + dtm->expires = jiffies_to_clock_t(stm->expires); +} #ifdef CONFIG_NET_CLS_ACT @@ -96,16 +107,19 @@ struct tc_action_ops { struct list_head head; char kind[IFNAMSIZ]; __u32 type; /* TBD to match kind */ + size_t size; struct module *owner; - int (*act)(struct sk_buff *, const struct tc_action *, struct tcf_result *); + int (*act)(struct sk_buff *, const struct tc_action *, + struct tcf_result *); int (*dump)(struct sk_buff *, struct tc_action *, int, int); void (*cleanup)(struct tc_action *, int bind); - int (*lookup)(struct net *, struct tc_action *, u32); + int (*lookup)(struct net *, struct tc_action **, u32); int (*init)(struct net *net, struct nlattr *nla, - struct nlattr *est, struct tc_action *act, int ovr, + struct nlattr *est, struct tc_action **act, int ovr, int bind); int (*walk)(struct net *, struct sk_buff *, - struct netlink_callback *, int, struct tc_action *); + struct netlink_callback *, int, const struct tc_action_ops *); + void (*stats_update)(struct tc_action *, u64, u32, u64); }; struct tc_action_net { @@ -114,8 +128,8 @@ struct tc_action_net { }; static inline -int tc_action_net_init(struct tc_action_net *tn, const struct tc_action_ops *ops, - unsigned int mask) +int tc_action_net_init(struct tc_action_net *tn, + const struct tc_action_ops *ops, unsigned int mask) { int err = 0; @@ -135,17 +149,19 @@ void tcf_hashinfo_destroy(const struct tc_action_ops *ops, static inline void tc_action_net_exit(struct tc_action_net *tn) { tcf_hashinfo_destroy(tn->ops, tn->hinfo); + kfree(tn->hinfo); } int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, struct netlink_callback *cb, int type, - struct tc_action *a); -int tcf_hash_search(struct tc_action_net *tn, struct tc_action *a, u32 index); + const struct tc_action_ops *ops); +int tcf_hash_search(struct tc_action_net *tn, struct tc_action **a, u32 index); u32 tcf_hash_new_index(struct tc_action_net *tn); -int tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action *a, - int bind); +bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action **a, + int bind); int tcf_hash_create(struct tc_action_net *tn, u32 index, struct nlattr *est, - struct tc_action *a, int size, int bind, bool cpustats); + struct tc_action **a, const struct tc_action_ops *ops, int bind, + bool cpustats); void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est); void tcf_hash_insert(struct tc_action_net *tn, struct tc_action *a); @@ -157,7 +173,8 @@ static inline int tcf_hash_release(struct tc_action *a, bool bind) } int tcf_register_action(struct tc_action_ops *a, struct pernet_operations *ops); -int tcf_unregister_action(struct tc_action_ops *a, struct pernet_operations *ops); +int tcf_unregister_action(struct tc_action_ops *a, + struct pernet_operations *ops); int tcf_action_destroy(struct list_head *actions, int bind); int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions, struct tcf_result *res); @@ -177,10 +194,25 @@ int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int); #define tc_for_each_action(_a, _exts) \ list_for_each_entry(a, &(_exts)->actions, list) + +#define tc_single_action(_exts) \ + (list_is_singular(&(_exts)->actions)) + +static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes, + u64 packets, u64 lastuse) +{ + if (!a->ops->stats_update) + return; + + a->ops->stats_update(a, bytes, packets, lastuse); +} + #else /* CONFIG_NET_CLS_ACT */ #define tc_no_actions(_exts) true -#define tc_for_each_action(_a, _exts) while (0) +#define tc_for_each_action(_a, _exts) while ((void)(_a), 0) +#define tc_single_action(_exts) false +#define tcf_action_stats_update(a, bytes, packets, lastuse) #endif /* CONFIG_NET_CLS_ACT */ #endif diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 730d856683e5..9826d3a9464c 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -94,6 +94,16 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr); void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr); +void addrconf_add_linklocal(struct inet6_dev *idev, + const struct in6_addr *addr, u32 flags); + +int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, + const struct prefix_info *pinfo, + struct inet6_dev *in6_dev, + const struct in6_addr *addr, int addr_type, + u32 addr_flags, bool sllao, bool tokenized, + __u32 valid_lft, u32 prefered_lft); + static inline int addrconf_ifid_eui48(u8 *eui, struct net_device *dev) { if (dev->addr_len != ETH_ALEN) diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index e797d45a5ae6..ac1bc3c49fbd 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -12,6 +12,7 @@ #ifndef _NET_RXRPC_H #define _NET_RXRPC_H +#include <linux/skbuff.h> #include <linux/rxrpc.h> struct rxrpc_call; @@ -19,11 +20,12 @@ struct rxrpc_call; /* * the mark applied to socket buffers that may be intercepted */ -enum { +enum rxrpc_skb_mark { RXRPC_SKB_MARK_DATA, /* data message */ RXRPC_SKB_MARK_FINAL_ACK, /* final ACK received message */ RXRPC_SKB_MARK_BUSY, /* server busy message */ RXRPC_SKB_MARK_REMOTE_ABORT, /* remote abort message */ + RXRPC_SKB_MARK_LOCAL_ABORT, /* local abort message */ RXRPC_SKB_MARK_NET_ERROR, /* network error message */ RXRPC_SKB_MARK_LOCAL_ERROR, /* local error message */ RXRPC_SKB_MARK_NEW_CALL, /* local error message */ diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index e9eb2d6791b3..f2758964ce6f 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -63,6 +63,8 @@ struct vsock_sock { struct list_head accept_queue; bool rejected; struct delayed_work dwork; + struct delayed_work close_work; + bool close_work_scheduled; u32 peer_shutdown; bool sent_request; bool ignore_connecting_rst; @@ -165,6 +167,9 @@ static inline int vsock_core_init(const struct vsock_transport *t) } void vsock_core_exit(void); +/* The transport may downcast this to access transport-specific functions */ +const struct vsock_transport *vsock_core_get_transport(void); + /**** UTILS ****/ void vsock_release_pending(struct sock *pending); @@ -177,6 +182,7 @@ void vsock_remove_connected(struct vsock_sock *vsk); struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr); struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, struct sockaddr_vm *dst); +void vsock_remove_sock(struct vsock_sock *vsk); void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); #endif /* __AF_VSOCK_H__ */ diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 5d38d980b89d..003b25283407 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -61,9 +61,11 @@ #define HCI_RS232 4 #define HCI_PCI 5 #define HCI_SDIO 6 +#define HCI_SPI 7 +#define HCI_I2C 8 /* HCI controller types */ -#define HCI_BREDR 0x00 +#define HCI_PRIMARY 0x00 #define HCI_AMP 0x01 /* First BR/EDR Controller shall have ID = 0 */ @@ -443,6 +445,7 @@ enum { /* ---- HCI Error Codes ---- */ #define HCI_ERROR_UNKNOWN_CONN_ID 0x02 #define HCI_ERROR_AUTH_FAILURE 0x05 +#define HCI_ERROR_PIN_OR_KEY_MISSING 0x06 #define HCI_ERROR_MEMORY_EXCEEDED 0x07 #define HCI_ERROR_CONNECTION_TIMEOUT 0x08 #define HCI_ERROR_REJ_LIMITED_RESOURCES 0x0d diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index dc71473462ac..ee7fc47680a1 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -372,6 +372,8 @@ struct hci_dev { atomic_t promisc; + const char *hw_info; + const char *fw_info; struct dentry *debugfs; struct device dev; @@ -654,6 +656,7 @@ enum { HCI_CONN_PARAM_REMOVAL_PEND, HCI_CONN_NEW_LINK_KEY, HCI_CONN_SCANNING, + HCI_CONN_AUTH_FAILURE, }; static inline bool hci_conn_ssp_enabled(struct hci_conn *conn) @@ -1021,6 +1024,10 @@ void hci_unregister_dev(struct hci_dev *hdev); int hci_suspend_dev(struct hci_dev *hdev); int hci_resume_dev(struct hci_dev *hdev); int hci_reset_dev(struct hci_dev *hdev); +int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb); +int hci_recv_diag(struct hci_dev *hdev, struct sk_buff *skb); +void hci_set_hw_info(struct hci_dev *hdev, const char *fmt, ...); +void hci_set_fw_info(struct hci_dev *hdev, const char *fmt, ...); int hci_dev_open(__u16 dev); int hci_dev_close(__u16 dev); int hci_dev_do_close(struct hci_dev *hdev); @@ -1097,9 +1104,6 @@ int hci_remove_adv_instance(struct hci_dev *hdev, u8 instance); void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb); -int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb); -int hci_recv_diag(struct hci_dev *hdev, struct sk_buff *skb); - void hci_init_sysfs(struct hci_dev *hdev); void hci_conn_init_sysfs(struct hci_conn *conn); void hci_conn_add_sysfs(struct hci_conn *conn); diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index ea73e0826aa7..7647964b1efa 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -645,6 +645,7 @@ struct mgmt_ev_device_connected { #define MGMT_DEV_DISCONN_TIMEOUT 0x01 #define MGMT_DEV_DISCONN_LOCAL_HOST 0x02 #define MGMT_DEV_DISCONN_REMOTE 0x03 +#define MGMT_DEV_DISCONN_AUTH_FAILURE 0x04 #define MGMT_EV_DEVICE_DISCONNECTED 0x000C struct mgmt_ev_device_disconnected { diff --git a/include/net/bonding.h b/include/net/bonding.h index 791800ddd6d9..6360c259da6d 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -34,6 +34,9 @@ #define BOND_DEFAULT_MIIMON 100 +#ifndef __long_aligned +#define __long_aligned __attribute__((aligned((sizeof(long))))) +#endif /* * Less bad way to call ioctl from within the kernel; this needs to be * done some other way to get the call out of interrupt context. @@ -138,7 +141,9 @@ struct bond_params { struct reciprocal_value reciprocal_packets_per_slave; u16 ad_actor_sys_prio; u16 ad_user_port_key; - u8 ad_actor_system[ETH_ALEN]; + + /* 2 bytes of padding : see ether_addr_equal_64bits() */ + u8 ad_actor_system[ETH_ALEN + 2]; }; struct bond_parm_tbl { diff --git a/include/net/calipso.h b/include/net/calipso.h new file mode 100644 index 000000000000..b1b30cd36601 --- /dev/null +++ b/include/net/calipso.h @@ -0,0 +1,91 @@ +/* + * CALIPSO - Common Architecture Label IPv6 Security Option + * + * This is an implementation of the CALIPSO protocol as specified in + * RFC 5570. + * + * Authors: Paul Moore <paul@paul-moore.com> + * Huw Davies <huw@codeweavers.com> + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * (c) Copyright Huw Davies <huw@codeweavers.com>, 2015 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + * + */ + +#ifndef _CALIPSO_H +#define _CALIPSO_H + +#include <linux/types.h> +#include <linux/rcupdate.h> +#include <linux/list.h> +#include <linux/net.h> +#include <linux/skbuff.h> +#include <net/netlabel.h> +#include <net/request_sock.h> +#include <linux/atomic.h> +#include <asm/unaligned.h> + +/* known doi values */ +#define CALIPSO_DOI_UNKNOWN 0x00000000 + +/* doi mapping types */ +#define CALIPSO_MAP_UNKNOWN 0 +#define CALIPSO_MAP_PASS 2 + +/* + * CALIPSO DOI definitions + */ + +/* DOI definition struct */ +struct calipso_doi { + u32 doi; + u32 type; + + atomic_t refcount; + struct list_head list; + struct rcu_head rcu; +}; + +/* + * Sysctl Variables + */ +extern int calipso_cache_enabled; +extern int calipso_cache_bucketsize; + +#ifdef CONFIG_NETLABEL +int __init calipso_init(void); +void calipso_exit(void); +bool calipso_validate(const struct sk_buff *skb, const unsigned char *option); +#else +static inline int __init calipso_init(void) +{ + return 0; +} + +static inline void calipso_exit(void) +{ +} +static inline bool calipso_validate(const struct sk_buff *skb, + const unsigned char *option) +{ + return true; +} +#endif /* CONFIG_NETLABEL */ + +#endif /* _CALIPSO_H */ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 9e1b24c29f0c..9c23f4d33e06 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -68,26 +68,6 @@ struct wiphy; */ /** - * enum ieee80211_band - supported frequency bands - * - * The bands are assigned this way because the supported - * bitrates differ in these bands. - * - * @IEEE80211_BAND_2GHZ: 2.4GHz ISM band - * @IEEE80211_BAND_5GHZ: around 5GHz band (4.9-5.7) - * @IEEE80211_BAND_60GHZ: around 60 GHz band (58.32 - 64.80 GHz) - * @IEEE80211_NUM_BANDS: number of defined bands - */ -enum ieee80211_band { - IEEE80211_BAND_2GHZ = NL80211_BAND_2GHZ, - IEEE80211_BAND_5GHZ = NL80211_BAND_5GHZ, - IEEE80211_BAND_60GHZ = NL80211_BAND_60GHZ, - - /* keep last */ - IEEE80211_NUM_BANDS -}; - -/** * enum ieee80211_channel_flags - channel flags * * Channel flags set by the regulatory control code. @@ -167,7 +147,7 @@ enum ieee80211_channel_flags { * @dfs_cac_ms: DFS CAC time in milliseconds, this is valid for DFS channels. */ struct ieee80211_channel { - enum ieee80211_band band; + enum nl80211_band band; u16 center_freq; u16 hw_value; u32 flags; @@ -324,7 +304,7 @@ struct ieee80211_sta_vht_cap { struct ieee80211_supported_band { struct ieee80211_channel *channels; struct ieee80211_rate *bitrates; - enum ieee80211_band band; + enum nl80211_band band; int n_channels; int n_bitrates; struct ieee80211_sta_ht_cap ht_cap; @@ -350,6 +330,9 @@ struct ieee80211_supported_band { * in a separate chapter. */ +#define VHT_MUMIMO_GROUPS_DATA_LEN (WLAN_MEMBERSHIP_LEN +\ + WLAN_USER_POSITION_LEN) + /** * struct vif_params - describes virtual interface parameters * @use_4addr: use 4-address frames @@ -359,10 +342,13 @@ struct ieee80211_supported_band { * This feature is only fully supported by drivers that enable the * %NL80211_FEATURE_MAC_ON_CREATE flag. Others may support creating ** only p2p devices with specified MAC. + * @vht_mumimo_groups: MU-MIMO groupID. used for monitoring only + * packets belonging to that MU-MIMO groupID. */ struct vif_params { - int use_4addr; - u8 macaddr[ETH_ALEN]; + int use_4addr; + u8 macaddr[ETH_ALEN]; + u8 vht_mumimo_groups[VHT_MUMIMO_GROUPS_DATA_LEN]; }; /** @@ -794,6 +780,7 @@ enum station_parameters_apply_mask { * (bitmask of BIT(NL80211_STA_FLAG_...)) * @listen_interval: listen interval or -1 for no change * @aid: AID or zero for no change + * @peer_aid: mesh peer AID or zero for no change * @plink_action: plink action to take * @plink_state: set the peer link state for a station * @ht_capa: HT capabilities of station @@ -816,6 +803,7 @@ enum station_parameters_apply_mask { * @supported_oper_classes_len: number of supported operating classes * @opmode_notif: operating mode field from Operating Mode Notification * @opmode_notif_used: information if operating mode field is used + * @support_p2p_ps: information if station supports P2P PS mechanism */ struct station_parameters { const u8 *supported_rates; @@ -824,6 +812,7 @@ struct station_parameters { u32 sta_modify_mask; int listen_interval; u16 aid; + u16 peer_aid; u8 supported_rates_len; u8 plink_action; u8 plink_state; @@ -841,6 +830,7 @@ struct station_parameters { u8 supported_oper_classes_len; u8 opmode_notif; bool opmode_notif_used; + int support_p2p_ps; }; /** @@ -1063,11 +1053,12 @@ struct cfg80211_tid_stats { * @rx_beacon: number of beacons received from this peer * @rx_beacon_signal_avg: signal strength average (in dBm) for beacons received * from this peer + * @rx_duration: aggregate PPDU duration(usecs) for all the frames from a peer * @pertid: per-TID statistics, see &struct cfg80211_tid_stats, using the last * (IEEE80211_NUM_TIDS) index for MSDUs not encapsulated in QoS-MPDUs. */ struct station_info { - u32 filled; + u64 filled; u32 connected_time; u32 inactive_time; u64 rx_bytes; @@ -1106,6 +1097,7 @@ struct station_info { u32 expected_throughput; u64 rx_beacon; + u64 rx_duration; u8 rx_beacon_signal_avg; struct cfg80211_tid_stats pertid[IEEE80211_NUM_TIDS + 1]; }; @@ -1368,7 +1360,7 @@ struct mesh_setup { bool user_mpm; u8 dtim_period; u16 beacon_interval; - int mcast_rate[IEEE80211_NUM_BANDS]; + int mcast_rate[NUM_NL80211_BANDS]; u32 basic_rates; }; @@ -1434,6 +1426,21 @@ struct cfg80211_ssid { }; /** + * struct cfg80211_scan_info - information about completed scan + * @scan_start_tsf: scan start time in terms of the TSF of the BSS that the + * wireless device that requested the scan is connected to. If this + * information is not available, this field is left zero. + * @tsf_bssid: the BSSID according to which %scan_start_tsf is set. + * @aborted: set to true if the scan was aborted for any reason, + * userspace will be notified of that + */ +struct cfg80211_scan_info { + u64 scan_start_tsf; + u8 tsf_bssid[ETH_ALEN] __aligned(2); + bool aborted; +}; + +/** * struct cfg80211_scan_request - scan request description * * @ssids: SSIDs to scan for (active scan only) @@ -1443,18 +1450,24 @@ struct cfg80211_ssid { * @scan_width: channel width for scanning * @ie: optional information element(s) to add into Probe Request or %NULL * @ie_len: length of ie in octets + * @duration: how long to listen on each channel, in TUs. If + * %duration_mandatory is not set, this is the maximum dwell time and + * the actual dwell time may be shorter. + * @duration_mandatory: if set, the scan duration must be as specified by the + * %duration field. * @flags: bit field of flags controlling operation * @rates: bitmap of rates to advertise for each band * @wiphy: the wiphy this was for * @scan_start: time (in jiffies) when the scan started * @wdev: the wireless device to scan for - * @aborted: (internal) scan request was notified as aborted + * @info: (internal) information about completed scan * @notified: (internal) scan request was notified as done or aborted * @no_cck: used to send probe requests at non CCK rate in 2GHz band * @mac_addr: MAC address used with randomisation * @mac_addr_mask: MAC address mask used with randomisation, bits that * are 0 in the mask should be randomised, bits that are 1 should * be taken from the @mac_addr + * @bssid: BSSID to scan for (most commonly, the wildcard BSSID) */ struct cfg80211_scan_request { struct cfg80211_ssid *ssids; @@ -1463,19 +1476,23 @@ struct cfg80211_scan_request { enum nl80211_bss_scan_width scan_width; const u8 *ie; size_t ie_len; + u16 duration; + bool duration_mandatory; u32 flags; - u32 rates[IEEE80211_NUM_BANDS]; + u32 rates[NUM_NL80211_BANDS]; struct wireless_dev *wdev; u8 mac_addr[ETH_ALEN] __aligned(2); u8 mac_addr_mask[ETH_ALEN] __aligned(2); + u8 bssid[ETH_ALEN] __aligned(2); /* internal */ struct wiphy *wiphy; unsigned long scan_start; - bool aborted, notified; + struct cfg80211_scan_info info; + bool notified; bool no_cck; /* keep last */ @@ -1608,16 +1625,23 @@ enum cfg80211_signal_type { * buffered on the device) and be accurate to about 10ms. * If the frame isn't buffered, just passing the return value of * ktime_get_boot_ns() is likely appropriate. + * @parent_tsf: the time at the start of reception of the first octet of the + * timestamp field of the frame. The time is the TSF of the BSS specified + * by %parent_bssid. + * @parent_bssid: the BSS according to which %parent_tsf is set. This is set to + * the BSS that requested the scan in which the beacon/probe was received. */ struct cfg80211_inform_bss { struct ieee80211_channel *chan; enum nl80211_bss_scan_width scan_width; s32 signal; u64 boottime_ns; + u64 parent_tsf; + u8 parent_bssid[ETH_ALEN] __aligned(2); }; /** - * struct cfg80211_bss_ie_data - BSS entry IE data + * struct cfg80211_bss_ies - BSS entry IE data * @tsf: TSF contained in the frame that carried these IEs * @rcu_head: internal use, for freeing * @len: length of the IEs @@ -1746,7 +1770,12 @@ enum cfg80211_assoc_req_flags { * @ie_len: Length of ie buffer in octets * @use_mfp: Use management frame protection (IEEE 802.11w) in this association * @crypto: crypto settings - * @prev_bssid: previous BSSID, if not %NULL use reassociate frame + * @prev_bssid: previous BSSID, if not %NULL use reassociate frame. This is used + * to indicate a request to reassociate within the ESS instead of a request + * do the initial association with the ESS. When included, this is set to + * the BSSID of the current association, i.e., to the value that is + * included in the Current AP address field of the Reassociation Request + * frame. * @flags: See &enum cfg80211_assoc_req_flags * @ht_capa: HT Capabilities over-rides. Values set in ht_capa_mask * will be used in ht_capa. Un-supported values will be ignored. @@ -1851,12 +1880,39 @@ struct cfg80211_ibss_params { bool privacy; bool control_port; bool userspace_handles_dfs; - int mcast_rate[IEEE80211_NUM_BANDS]; + int mcast_rate[NUM_NL80211_BANDS]; struct ieee80211_ht_cap ht_capa; struct ieee80211_ht_cap ht_capa_mask; }; /** + * struct cfg80211_bss_select_adjust - BSS selection with RSSI adjustment. + * + * @band: band of BSS which should match for RSSI level adjustment. + * @delta: value of RSSI level adjustment. + */ +struct cfg80211_bss_select_adjust { + enum nl80211_band band; + s8 delta; +}; + +/** + * struct cfg80211_bss_selection - connection parameters for BSS selection. + * + * @behaviour: requested BSS selection behaviour. + * @param: parameters for requestion behaviour. + * @band_pref: preferred band for %NL80211_BSS_SELECT_ATTR_BAND_PREF. + * @adjust: parameters for %NL80211_BSS_SELECT_ATTR_RSSI_ADJUST. + */ +struct cfg80211_bss_selection { + enum nl80211_bss_select_attr behaviour; + union { + enum nl80211_band band_pref; + struct cfg80211_bss_select_adjust adjust; + } param; +}; + +/** * struct cfg80211_connect_params - Connection parameters * * This structure provides information needed to complete IEEE 802.11 @@ -1893,6 +1949,13 @@ struct cfg80211_ibss_params { * @vht_capa_mask: The bits of vht_capa which are to be used. * @pbss: if set, connect to a PCP instead of AP. Valid for DMG * networks. + * @bss_select: criteria to be used for BSS selection. + * @prev_bssid: previous BSSID, if not %NULL use reassociate frame. This is used + * to indicate a request to reassociate within the ESS instead of a request + * do the initial association with the ESS. When included, this is set to + * the BSSID of the current association, i.e., to the value that is + * included in the Current AP address field of the Reassociation Request + * frame. */ struct cfg80211_connect_params { struct ieee80211_channel *channel; @@ -1916,6 +1979,8 @@ struct cfg80211_connect_params { struct ieee80211_vht_cap vht_capa; struct ieee80211_vht_cap vht_capa_mask; bool pbss; + struct cfg80211_bss_selection bss_select; + const u8 *prev_bssid; }; /** @@ -1945,7 +2010,7 @@ struct cfg80211_bitrate_mask { u8 ht_mcs[IEEE80211_HT_MCS_MASK_LEN]; u16 vht_mcs[NL80211_VHT_NSS_MAX]; enum nl80211_txrate_gi gi; - } control[IEEE80211_NUM_BANDS]; + } control[NUM_NL80211_BANDS]; }; /** * struct cfg80211_pmksa - PMK Security Association @@ -2340,9 +2405,23 @@ struct cfg80211_qos_map { * (invoked with the wireless_dev mutex held) * * @connect: Connect to the ESS with the specified parameters. When connected, - * call cfg80211_connect_result() with status code %WLAN_STATUS_SUCCESS. - * If the connection fails for some reason, call cfg80211_connect_result() - * with the status from the AP. + * call cfg80211_connect_result()/cfg80211_connect_bss() with status code + * %WLAN_STATUS_SUCCESS. If the connection fails for some reason, call + * cfg80211_connect_result()/cfg80211_connect_bss() with the status code + * from the AP or cfg80211_connect_timeout() if no frame with status code + * was received. + * The driver is allowed to roam to other BSSes within the ESS when the + * other BSS matches the connect parameters. When such roaming is initiated + * by the driver, the driver is expected to verify that the target matches + * the configured security parameters and to use Reassociation Request + * frame instead of Association Request frame. + * The connect function can also be used to request the driver to perform a + * specific roam when connected to an ESS. In that case, the prev_bssid + * parameter is set to the BSSID of the currently associated BSS as an + * indication of requesting reassociation. + * In both the driver-initiated and new connect() call initiated roaming + * cases, the result of roaming is indicated with a call to + * cfg80211_roamed() or cfg80211_roamed_bss(). * (invoked with the wireless_dev mutex held) * @disconnect: Disconnect from the BSS/ESS. * (invoked with the wireless_dev mutex held) @@ -2622,7 +2701,7 @@ struct cfg80211_ops { int (*leave_ibss)(struct wiphy *wiphy, struct net_device *dev); int (*set_mcast_rate)(struct wiphy *wiphy, struct net_device *dev, - int rate[IEEE80211_NUM_BANDS]); + int rate[NUM_NL80211_BANDS]); int (*set_wiphy_params)(struct wiphy *wiphy, u32 changed); @@ -3043,6 +3122,24 @@ struct wiphy_vendor_command { }; /** + * struct wiphy_iftype_ext_capab - extended capabilities per interface type + * @iftype: interface type + * @extended_capabilities: extended capabilities supported by the driver, + * additional capabilities might be supported by userspace; these are the + * 802.11 extended capabilities ("Extended Capabilities element") and are + * in the same format as in the information element. See IEEE Std + * 802.11-2012 8.4.2.29 for the defined fields. + * @extended_capabilities_mask: mask of the valid values + * @extended_capabilities_len: length of the extended capabilities + */ +struct wiphy_iftype_ext_capab { + enum nl80211_iftype iftype; + const u8 *extended_capabilities; + const u8 *extended_capabilities_mask; + u8 extended_capabilities_len; +}; + +/** * struct wiphy - wireless hardware description * @reg_notifier: the driver's regulatory notification callback, * note that if your driver uses wiphy_apply_custom_regulatory() @@ -3152,6 +3249,9 @@ struct wiphy_vendor_command { * @vht_capa_mod_mask: Specify what VHT capabilities can be over-ridden. * If null, then none can be over-ridden. * + * @wdev_list: the list of associated (virtual) interfaces; this list must + * not be modified by the driver, but can be read with RTNL/RCU protection. + * * @max_acl_mac_addrs: Maximum number of MAC addresses that the device * supports for ACL. * @@ -3159,9 +3259,14 @@ struct wiphy_vendor_command { * additional capabilities might be supported by userspace; these are * the 802.11 extended capabilities ("Extended Capabilities element") * and are in the same format as in the information element. See - * 802.11-2012 8.4.2.29 for the defined fields. + * 802.11-2012 8.4.2.29 for the defined fields. These are the default + * extended capabilities to be used if the capabilities are not specified + * for a specific interface type in iftype_ext_capab. * @extended_capabilities_mask: mask of the valid values * @extended_capabilities_len: length of the extended capabilities + * @iftype_ext_capab: array of extended capabilities per interface type + * @num_iftype_ext_capab: number of interface types for which extended + * capabilities are specified separately. * @coalesce: packet coalescing support information * * @vendor_commands: array of vendor commands supported by the hardware @@ -3184,6 +3289,9 @@ struct wiphy_vendor_command { * low rssi when a frame is heard on different channel, then it should set * this variable to the maximal offset for which it can compensate. * This value should be set in MHz. + * @bss_select_support: bitmask indicating the BSS selection criteria supported + * by the driver in the .connect() callback. The bit position maps to the + * attribute indices defined in &enum nl80211_bss_select_attr. */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -3258,6 +3366,9 @@ struct wiphy { const u8 *extended_capabilities, *extended_capabilities_mask; u8 extended_capabilities_len; + const struct wiphy_iftype_ext_capab *iftype_ext_capab; + unsigned int num_iftype_ext_capab; + /* If multiple wiphys are registered and you're handed e.g. * a regular netdev with assigned ieee80211_ptr, you won't * know whether it points to a wiphy your driver has registered @@ -3265,7 +3376,7 @@ struct wiphy { * help determine whether you own this wiphy or not. */ const void *privid; - struct ieee80211_supported_band *bands[IEEE80211_NUM_BANDS]; + struct ieee80211_supported_band *bands[NUM_NL80211_BANDS]; /* Lets us get back the wiphy on the callback */ void (*reg_notifier)(struct wiphy *wiphy, @@ -3288,6 +3399,8 @@ struct wiphy { const struct ieee80211_ht_cap *ht_capa_mod_mask; const struct ieee80211_vht_cap *vht_capa_mod_mask; + struct list_head wdev_list; + /* the network namespace this phy lives in currently */ possible_net_t _net; @@ -3306,6 +3419,8 @@ struct wiphy { u8 max_num_csa_counters; u8 max_adj_channel_rssi_comp; + u32 bss_select_support; + char priv[0] __aligned(NETDEV_ALIGN); }; @@ -3598,7 +3713,7 @@ static inline void *wdev_priv(struct wireless_dev *wdev) * @band: band, necessary due to channel number overlap * Return: The corresponding frequency (in MHz), or 0 if the conversion failed. */ -int ieee80211_channel_to_frequency(int chan, enum ieee80211_band band); +int ieee80211_channel_to_frequency(int chan, enum nl80211_band band); /** * ieee80211_frequency_to_channel - convert frequency to channel number @@ -3851,7 +3966,7 @@ const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len); * cfg80211_find_vendor_ie - find vendor specific information element in data * * @oui: vendor OUI - * @oui_type: vendor-specific OUI type + * @oui_type: vendor-specific OUI type (must be < 0xff), negative means any * @ies: data consisting of IEs * @len: length of data * @@ -3863,7 +3978,7 @@ const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len); * Note: There are no checks on the element length other than having to fit into * the given data. */ -const u8 *cfg80211_find_vendor_ie(unsigned int oui, u8 oui_type, +const u8 *cfg80211_find_vendor_ie(unsigned int oui, int oui_type, const u8 *ies, int len); /** @@ -3984,10 +4099,10 @@ const char *reg_initiator_name(enum nl80211_reg_initiator initiator); * cfg80211_scan_done - notify that scan finished * * @request: the corresponding scan request - * @aborted: set to true if the scan was aborted for any reason, - * userspace will be notified of that + * @info: information about the completed scan */ -void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted); +void cfg80211_scan_done(struct cfg80211_scan_request *request, + struct cfg80211_scan_info *info); /** * cfg80211_sched_scan_results - notify that new scan results are available @@ -4610,6 +4725,32 @@ static inline void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp) #endif /** + * cfg80211_connect_bss - notify cfg80211 of connection result + * + * @dev: network device + * @bssid: the BSSID of the AP + * @bss: entry of bss to which STA got connected to, can be obtained + * through cfg80211_get_bss (may be %NULL) + * @req_ie: association request IEs (maybe be %NULL) + * @req_ie_len: association request IEs length + * @resp_ie: association response IEs (may be %NULL) + * @resp_ie_len: assoc response IEs length + * @status: status code, 0 for successful connection, use + * %WLAN_STATUS_UNSPECIFIED_FAILURE if your device cannot give you + * the real status code for failures. + * @gfp: allocation flags + * + * It should be called by the underlying driver whenever connect() has + * succeeded. This is similar to cfg80211_connect_result(), but with the + * option of identifying the exact bss entry for the connection. Only one of + * these functions should be called. + */ +void cfg80211_connect_bss(struct net_device *dev, const u8 *bssid, + struct cfg80211_bss *bss, const u8 *req_ie, + size_t req_ie_len, const u8 *resp_ie, + size_t resp_ie_len, int status, gfp_t gfp); + +/** * cfg80211_connect_result - notify cfg80211 of connection result * * @dev: network device @@ -4626,10 +4767,38 @@ static inline void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp) * It should be called by the underlying driver whenever connect() has * succeeded. */ -void cfg80211_connect_result(struct net_device *dev, const u8 *bssid, - const u8 *req_ie, size_t req_ie_len, - const u8 *resp_ie, size_t resp_ie_len, - u16 status, gfp_t gfp); +static inline void +cfg80211_connect_result(struct net_device *dev, const u8 *bssid, + const u8 *req_ie, size_t req_ie_len, + const u8 *resp_ie, size_t resp_ie_len, + u16 status, gfp_t gfp) +{ + cfg80211_connect_bss(dev, bssid, NULL, req_ie, req_ie_len, resp_ie, + resp_ie_len, status, gfp); +} + +/** + * cfg80211_connect_timeout - notify cfg80211 of connection timeout + * + * @dev: network device + * @bssid: the BSSID of the AP + * @req_ie: association request IEs (maybe be %NULL) + * @req_ie_len: association request IEs length + * @gfp: allocation flags + * + * It should be called by the underlying driver whenever connect() has failed + * in a sequence where no explicit authentication/association rejection was + * received from the AP. This could happen, e.g., due to not being able to send + * out the Authentication or Association Request frame or timing out while + * waiting for the response. + */ +static inline void +cfg80211_connect_timeout(struct net_device *dev, const u8 *bssid, + const u8 *req_ie, size_t req_ie_len, gfp_t gfp) +{ + cfg80211_connect_bss(dev, bssid, NULL, req_ie, req_ie_len, NULL, 0, -1, + gfp); +} /** * cfg80211_roamed - notify cfg80211 of roaming @@ -5029,7 +5198,7 @@ void cfg80211_ch_switch_started_notify(struct net_device *dev, * Returns %true if the conversion was successful, %false otherwise. */ bool ieee80211_operating_class_to_band(u8 operating_class, - enum ieee80211_band *band); + enum nl80211_band *band); /** * ieee80211_chandef_to_operating_class - convert chandef to operation class diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index 171cd76558fb..795ca4008f72 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -219,9 +219,22 @@ struct wpan_phy { struct device dev; + /* the network namespace this phy lives in currently */ + possible_net_t _net; + char priv[0] __aligned(NETDEV_ALIGN); }; +static inline struct net *wpan_phy_net(struct wpan_phy *wpan_phy) +{ + return read_pnet(&wpan_phy->_net); +} + +static inline void wpan_phy_net_set(struct wpan_phy *wpan_phy, struct net *net) +{ + write_pnet(&wpan_phy->_net, net); +} + struct ieee802154_addr { u8 mode; __le16 pan_id; diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h index c0a92e2c286d..74c9693d4941 100644 --- a/include/net/cls_cgroup.h +++ b/include/net/cls_cgroup.h @@ -17,6 +17,7 @@ #include <linux/hardirq.h> #include <linux/rcupdate.h> #include <net/sock.h> +#include <net/inet_sock.h> #ifdef CONFIG_CGROUP_NET_CLASSID struct cgroup_cls_state { @@ -63,11 +64,13 @@ static inline u32 task_get_classid(const struct sk_buff *skb) * softirqs always disables bh. */ if (in_serving_softirq()) { + struct sock *sk = skb_to_full_sk(skb); + /* If there is an sock_cgroup_classid we'll use that. */ - if (!skb->sk) + if (!sk || !sk_fullsock(sk)) return 0; - classid = sock_cgroup_classid(&skb->sk->sk_cgrp_data); + classid = sock_cgroup_classid(&sk->sk_cgrp_data); } return classid; diff --git a/include/net/codel.h b/include/net/codel.h index d168aca115cc..a6e428f80135 100644 --- a/include/net/codel.h +++ b/include/net/codel.h @@ -87,27 +87,6 @@ static inline codel_time_t codel_get_time(void) ((s32)((a) - (b)) >= 0)) #define codel_time_before_eq(a, b) codel_time_after_eq(b, a) -/* Qdiscs using codel plugin must use codel_skb_cb in their own cb[] */ -struct codel_skb_cb { - codel_time_t enqueue_time; -}; - -static struct codel_skb_cb *get_codel_cb(const struct sk_buff *skb) -{ - qdisc_cb_private_validate(skb, sizeof(struct codel_skb_cb)); - return (struct codel_skb_cb *)qdisc_skb_cb(skb)->data; -} - -static codel_time_t codel_get_enqueue_time(const struct sk_buff *skb) -{ - return get_codel_cb(skb)->enqueue_time; -} - -static void codel_set_enqueue_time(struct sk_buff *skb) -{ - get_codel_cb(skb)->enqueue_time = codel_get_time(); -} - static inline u32 codel_time_to_us(codel_time_t val) { u64 valns = ((u64)val << CODEL_SHIFT); @@ -176,198 +155,10 @@ struct codel_stats { #define CODEL_DISABLED_THRESHOLD INT_MAX -static void codel_params_init(struct codel_params *params, - const struct Qdisc *sch) -{ - params->interval = MS2TIME(100); - params->target = MS2TIME(5); - params->mtu = psched_mtu(qdisc_dev(sch)); - params->ce_threshold = CODEL_DISABLED_THRESHOLD; - params->ecn = false; -} - -static void codel_vars_init(struct codel_vars *vars) -{ - memset(vars, 0, sizeof(*vars)); -} - -static void codel_stats_init(struct codel_stats *stats) -{ - stats->maxpacket = 0; -} - -/* - * http://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Iterative_methods_for_reciprocal_square_roots - * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2) - * - * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32 - */ -static void codel_Newton_step(struct codel_vars *vars) -{ - u32 invsqrt = ((u32)vars->rec_inv_sqrt) << REC_INV_SQRT_SHIFT; - u32 invsqrt2 = ((u64)invsqrt * invsqrt) >> 32; - u64 val = (3LL << 32) - ((u64)vars->count * invsqrt2); - - val >>= 2; /* avoid overflow in following multiply */ - val = (val * invsqrt) >> (32 - 2 + 1); - - vars->rec_inv_sqrt = val >> REC_INV_SQRT_SHIFT; -} - -/* - * CoDel control_law is t + interval/sqrt(count) - * We maintain in rec_inv_sqrt the reciprocal value of sqrt(count) to avoid - * both sqrt() and divide operation. - */ -static codel_time_t codel_control_law(codel_time_t t, - codel_time_t interval, - u32 rec_inv_sqrt) -{ - return t + reciprocal_scale(interval, rec_inv_sqrt << REC_INV_SQRT_SHIFT); -} - -static bool codel_should_drop(const struct sk_buff *skb, - struct Qdisc *sch, - struct codel_vars *vars, - struct codel_params *params, - struct codel_stats *stats, - codel_time_t now) -{ - bool ok_to_drop; - - if (!skb) { - vars->first_above_time = 0; - return false; - } - - vars->ldelay = now - codel_get_enqueue_time(skb); - sch->qstats.backlog -= qdisc_pkt_len(skb); - - if (unlikely(qdisc_pkt_len(skb) > stats->maxpacket)) - stats->maxpacket = qdisc_pkt_len(skb); - - if (codel_time_before(vars->ldelay, params->target) || - sch->qstats.backlog <= params->mtu) { - /* went below - stay below for at least interval */ - vars->first_above_time = 0; - return false; - } - ok_to_drop = false; - if (vars->first_above_time == 0) { - /* just went above from below. If we stay above - * for at least interval we'll say it's ok to drop - */ - vars->first_above_time = now + params->interval; - } else if (codel_time_after(now, vars->first_above_time)) { - ok_to_drop = true; - } - return ok_to_drop; -} - +typedef u32 (*codel_skb_len_t)(const struct sk_buff *skb); +typedef codel_time_t (*codel_skb_time_t)(const struct sk_buff *skb); +typedef void (*codel_skb_drop_t)(struct sk_buff *skb, void *ctx); typedef struct sk_buff * (*codel_skb_dequeue_t)(struct codel_vars *vars, - struct Qdisc *sch); + void *ctx); -static struct sk_buff *codel_dequeue(struct Qdisc *sch, - struct codel_params *params, - struct codel_vars *vars, - struct codel_stats *stats, - codel_skb_dequeue_t dequeue_func) -{ - struct sk_buff *skb = dequeue_func(vars, sch); - codel_time_t now; - bool drop; - - if (!skb) { - vars->dropping = false; - return skb; - } - now = codel_get_time(); - drop = codel_should_drop(skb, sch, vars, params, stats, now); - if (vars->dropping) { - if (!drop) { - /* sojourn time below target - leave dropping state */ - vars->dropping = false; - } else if (codel_time_after_eq(now, vars->drop_next)) { - /* It's time for the next drop. Drop the current - * packet and dequeue the next. The dequeue might - * take us out of dropping state. - * If not, schedule the next drop. - * A large backlog might result in drop rates so high - * that the next drop should happen now, - * hence the while loop. - */ - while (vars->dropping && - codel_time_after_eq(now, vars->drop_next)) { - vars->count++; /* dont care of possible wrap - * since there is no more divide - */ - codel_Newton_step(vars); - if (params->ecn && INET_ECN_set_ce(skb)) { - stats->ecn_mark++; - vars->drop_next = - codel_control_law(vars->drop_next, - params->interval, - vars->rec_inv_sqrt); - goto end; - } - stats->drop_len += qdisc_pkt_len(skb); - qdisc_drop(skb, sch); - stats->drop_count++; - skb = dequeue_func(vars, sch); - if (!codel_should_drop(skb, sch, - vars, params, stats, now)) { - /* leave dropping state */ - vars->dropping = false; - } else { - /* and schedule the next drop */ - vars->drop_next = - codel_control_law(vars->drop_next, - params->interval, - vars->rec_inv_sqrt); - } - } - } - } else if (drop) { - u32 delta; - - if (params->ecn && INET_ECN_set_ce(skb)) { - stats->ecn_mark++; - } else { - stats->drop_len += qdisc_pkt_len(skb); - qdisc_drop(skb, sch); - stats->drop_count++; - - skb = dequeue_func(vars, sch); - drop = codel_should_drop(skb, sch, vars, params, - stats, now); - } - vars->dropping = true; - /* if min went above target close to when we last went below it - * assume that the drop rate that controlled the queue on the - * last cycle is a good starting point to control it now. - */ - delta = vars->count - vars->lastcount; - if (delta > 1 && - codel_time_before(now - vars->drop_next, - 16 * params->interval)) { - vars->count = delta; - /* we dont care if rec_inv_sqrt approximation - * is not very precise : - * Next Newton steps will correct it quadratically. - */ - codel_Newton_step(vars); - } else { - vars->count = 1; - vars->rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT; - } - vars->lastcount = vars->count; - vars->drop_next = codel_control_law(now, params->interval, - vars->rec_inv_sqrt); - } -end: - if (skb && codel_time_after(vars->ldelay, params->ce_threshold) && - INET_ECN_set_ce(skb)) - stats->ce_mark++; - return skb; -} #endif diff --git a/include/net/codel_impl.h b/include/net/codel_impl.h new file mode 100644 index 000000000000..d289b91dcd65 --- /dev/null +++ b/include/net/codel_impl.h @@ -0,0 +1,255 @@ +#ifndef __NET_SCHED_CODEL_IMPL_H +#define __NET_SCHED_CODEL_IMPL_H + +/* + * Codel - The Controlled-Delay Active Queue Management algorithm + * + * Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com> + * Copyright (C) 2011-2012 Van Jacobson <van@pollere.net> + * Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net> + * Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The names of the authors may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * Alternatively, provided that this notice is retained in full, this + * software may be distributed under the terms of the GNU General + * Public License ("GPL") version 2, in which case the provisions of the + * GPL apply INSTEAD OF those given above. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + */ + +/* Controlling Queue Delay (CoDel) algorithm + * ========================================= + * Source : Kathleen Nichols and Van Jacobson + * http://queue.acm.org/detail.cfm?id=2209336 + * + * Implemented on linux by Dave Taht and Eric Dumazet + */ + +static void codel_params_init(struct codel_params *params) +{ + params->interval = MS2TIME(100); + params->target = MS2TIME(5); + params->ce_threshold = CODEL_DISABLED_THRESHOLD; + params->ecn = false; +} + +static void codel_vars_init(struct codel_vars *vars) +{ + memset(vars, 0, sizeof(*vars)); +} + +static void codel_stats_init(struct codel_stats *stats) +{ + stats->maxpacket = 0; +} + +/* + * http://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Iterative_methods_for_reciprocal_square_roots + * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2) + * + * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32 + */ +static void codel_Newton_step(struct codel_vars *vars) +{ + u32 invsqrt = ((u32)vars->rec_inv_sqrt) << REC_INV_SQRT_SHIFT; + u32 invsqrt2 = ((u64)invsqrt * invsqrt) >> 32; + u64 val = (3LL << 32) - ((u64)vars->count * invsqrt2); + + val >>= 2; /* avoid overflow in following multiply */ + val = (val * invsqrt) >> (32 - 2 + 1); + + vars->rec_inv_sqrt = val >> REC_INV_SQRT_SHIFT; +} + +/* + * CoDel control_law is t + interval/sqrt(count) + * We maintain in rec_inv_sqrt the reciprocal value of sqrt(count) to avoid + * both sqrt() and divide operation. + */ +static codel_time_t codel_control_law(codel_time_t t, + codel_time_t interval, + u32 rec_inv_sqrt) +{ + return t + reciprocal_scale(interval, rec_inv_sqrt << REC_INV_SQRT_SHIFT); +} + +static bool codel_should_drop(const struct sk_buff *skb, + void *ctx, + struct codel_vars *vars, + struct codel_params *params, + struct codel_stats *stats, + codel_skb_len_t skb_len_func, + codel_skb_time_t skb_time_func, + u32 *backlog, + codel_time_t now) +{ + bool ok_to_drop; + u32 skb_len; + + if (!skb) { + vars->first_above_time = 0; + return false; + } + + skb_len = skb_len_func(skb); + vars->ldelay = now - skb_time_func(skb); + + if (unlikely(skb_len > stats->maxpacket)) + stats->maxpacket = skb_len; + + if (codel_time_before(vars->ldelay, params->target) || + *backlog <= params->mtu) { + /* went below - stay below for at least interval */ + vars->first_above_time = 0; + return false; + } + ok_to_drop = false; + if (vars->first_above_time == 0) { + /* just went above from below. If we stay above + * for at least interval we'll say it's ok to drop + */ + vars->first_above_time = now + params->interval; + } else if (codel_time_after(now, vars->first_above_time)) { + ok_to_drop = true; + } + return ok_to_drop; +} + +static struct sk_buff *codel_dequeue(void *ctx, + u32 *backlog, + struct codel_params *params, + struct codel_vars *vars, + struct codel_stats *stats, + codel_skb_len_t skb_len_func, + codel_skb_time_t skb_time_func, + codel_skb_drop_t drop_func, + codel_skb_dequeue_t dequeue_func) +{ + struct sk_buff *skb = dequeue_func(vars, ctx); + codel_time_t now; + bool drop; + + if (!skb) { + vars->dropping = false; + return skb; + } + now = codel_get_time(); + drop = codel_should_drop(skb, ctx, vars, params, stats, + skb_len_func, skb_time_func, backlog, now); + if (vars->dropping) { + if (!drop) { + /* sojourn time below target - leave dropping state */ + vars->dropping = false; + } else if (codel_time_after_eq(now, vars->drop_next)) { + /* It's time for the next drop. Drop the current + * packet and dequeue the next. The dequeue might + * take us out of dropping state. + * If not, schedule the next drop. + * A large backlog might result in drop rates so high + * that the next drop should happen now, + * hence the while loop. + */ + while (vars->dropping && + codel_time_after_eq(now, vars->drop_next)) { + vars->count++; /* dont care of possible wrap + * since there is no more divide + */ + codel_Newton_step(vars); + if (params->ecn && INET_ECN_set_ce(skb)) { + stats->ecn_mark++; + vars->drop_next = + codel_control_law(vars->drop_next, + params->interval, + vars->rec_inv_sqrt); + goto end; + } + stats->drop_len += skb_len_func(skb); + drop_func(skb, ctx); + stats->drop_count++; + skb = dequeue_func(vars, ctx); + if (!codel_should_drop(skb, ctx, + vars, params, stats, + skb_len_func, + skb_time_func, + backlog, now)) { + /* leave dropping state */ + vars->dropping = false; + } else { + /* and schedule the next drop */ + vars->drop_next = + codel_control_law(vars->drop_next, + params->interval, + vars->rec_inv_sqrt); + } + } + } + } else if (drop) { + u32 delta; + + if (params->ecn && INET_ECN_set_ce(skb)) { + stats->ecn_mark++; + } else { + stats->drop_len += skb_len_func(skb); + drop_func(skb, ctx); + stats->drop_count++; + + skb = dequeue_func(vars, ctx); + drop = codel_should_drop(skb, ctx, vars, params, + stats, skb_len_func, + skb_time_func, backlog, now); + } + vars->dropping = true; + /* if min went above target close to when we last went below it + * assume that the drop rate that controlled the queue on the + * last cycle is a good starting point to control it now. + */ + delta = vars->count - vars->lastcount; + if (delta > 1 && + codel_time_before(now - vars->drop_next, + 16 * params->interval)) { + vars->count = delta; + /* we dont care if rec_inv_sqrt approximation + * is not very precise : + * Next Newton steps will correct it quadratically. + */ + codel_Newton_step(vars); + } else { + vars->count = 1; + vars->rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT; + } + vars->lastcount = vars->count; + vars->drop_next = codel_control_law(now, params->interval, + vars->rec_inv_sqrt); + } +end: + if (skb && codel_time_after(vars->ldelay, params->ce_threshold) && + INET_ECN_set_ce(skb)) + stats->ce_mark++; + return skb; +} + +#endif diff --git a/include/net/codel_qdisc.h b/include/net/codel_qdisc.h new file mode 100644 index 000000000000..098630f83a55 --- /dev/null +++ b/include/net/codel_qdisc.h @@ -0,0 +1,74 @@ +#ifndef __NET_SCHED_CODEL_QDISC_H +#define __NET_SCHED_CODEL_QDISC_H + +/* + * Codel - The Controlled-Delay Active Queue Management algorithm + * + * Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com> + * Copyright (C) 2011-2012 Van Jacobson <van@pollere.net> + * Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net> + * Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The names of the authors may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * Alternatively, provided that this notice is retained in full, this + * software may be distributed under the terms of the GNU General + * Public License ("GPL") version 2, in which case the provisions of the + * GPL apply INSTEAD OF those given above. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + */ + +/* Controlling Queue Delay (CoDel) algorithm + * ========================================= + * Source : Kathleen Nichols and Van Jacobson + * http://queue.acm.org/detail.cfm?id=2209336 + * + * Implemented on linux by Dave Taht and Eric Dumazet + */ + +/* Qdiscs using codel plugin must use codel_skb_cb in their own cb[] */ +struct codel_skb_cb { + codel_time_t enqueue_time; + unsigned int mem_usage; +}; + +static struct codel_skb_cb *get_codel_cb(const struct sk_buff *skb) +{ + qdisc_cb_private_validate(skb, sizeof(struct codel_skb_cb)); + return (struct codel_skb_cb *)qdisc_skb_cb(skb)->data; +} + +static codel_time_t codel_get_enqueue_time(const struct sk_buff *skb) +{ + return get_codel_cb(skb)->enqueue_time; +} + +static void codel_set_enqueue_time(struct sk_buff *skb) +{ + get_codel_cb(skb)->enqueue_time = codel_get_time(); +} + +#endif diff --git a/include/net/compat.h b/include/net/compat.h index 48103cf94e97..13de0ccaa059 100644 --- a/include/net/compat.h +++ b/include/net/compat.h @@ -42,6 +42,7 @@ int compat_sock_get_timestampns(struct sock *, struct timespec __user *); int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *, struct sockaddr __user **, struct iovec **); +struct sock_fprog __user *get_compat_bpf_fprog(char __user *optval); asmlinkage long compat_sys_sendmsg(int, struct compat_msghdr __user *, unsigned int); asmlinkage long compat_sys_sendmmsg(int, struct compat_mmsghdr __user *, diff --git a/include/net/devlink.h b/include/net/devlink.h index c37d257891d6..c99ffe8cef3c 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -24,6 +24,7 @@ struct devlink_ops; struct devlink { struct list_head list; struct list_head port_list; + struct list_head sb_list; const struct devlink_ops *ops; struct device *dev; possible_net_t _net; @@ -42,6 +43,12 @@ struct devlink_port { u32 split_group; }; +struct devlink_sb_pool_info { + enum devlink_sb_pool_type pool_type; + u32 size; + enum devlink_sb_threshold_type threshold_type; +}; + struct devlink_ops { size_t priv_size; int (*port_type_set)(struct devlink_port *devlink_port, @@ -49,6 +56,43 @@ struct devlink_ops { int (*port_split)(struct devlink *devlink, unsigned int port_index, unsigned int count); int (*port_unsplit)(struct devlink *devlink, unsigned int port_index); + int (*sb_pool_get)(struct devlink *devlink, unsigned int sb_index, + u16 pool_index, + struct devlink_sb_pool_info *pool_info); + int (*sb_pool_set)(struct devlink *devlink, unsigned int sb_index, + u16 pool_index, u32 size, + enum devlink_sb_threshold_type threshold_type); + int (*sb_port_pool_get)(struct devlink_port *devlink_port, + unsigned int sb_index, u16 pool_index, + u32 *p_threshold); + int (*sb_port_pool_set)(struct devlink_port *devlink_port, + unsigned int sb_index, u16 pool_index, + u32 threshold); + int (*sb_tc_pool_bind_get)(struct devlink_port *devlink_port, + unsigned int sb_index, + u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 *p_pool_index, u32 *p_threshold); + int (*sb_tc_pool_bind_set)(struct devlink_port *devlink_port, + unsigned int sb_index, + u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 pool_index, u32 threshold); + int (*sb_occ_snapshot)(struct devlink *devlink, + unsigned int sb_index); + int (*sb_occ_max_clear)(struct devlink *devlink, + unsigned int sb_index); + int (*sb_occ_port_pool_get)(struct devlink_port *devlink_port, + unsigned int sb_index, u16 pool_index, + u32 *p_cur, u32 *p_max); + int (*sb_occ_tc_port_bind_get)(struct devlink_port *devlink_port, + unsigned int sb_index, + u16 tc_index, + enum devlink_sb_pool_type pool_type, + u32 *p_cur, u32 *p_max); + + int (*eswitch_mode_get)(struct devlink *devlink, u16 *p_mode); + int (*eswitch_mode_set)(struct devlink *devlink, u16 mode); }; static inline void *devlink_priv(struct devlink *devlink) @@ -82,6 +126,11 @@ void devlink_port_type_ib_set(struct devlink_port *devlink_port, void devlink_port_type_clear(struct devlink_port *devlink_port); void devlink_port_split_set(struct devlink_port *devlink_port, u32 split_group); +int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, + u32 size, u16 ingress_pools_count, + u16 egress_pools_count, u16 ingress_tc_count, + u16 egress_tc_count); +void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index); #else @@ -135,6 +184,21 @@ static inline void devlink_port_split_set(struct devlink_port *devlink_port, { } +static inline int devlink_sb_register(struct devlink *devlink, + unsigned int sb_index, u32 size, + u16 ingress_pools_count, + u16 egress_pools_count, + u16 ingress_tc_count, + u16 egress_tc_count) +{ + return 0; +} + +static inline void devlink_sb_unregister(struct devlink *devlink, + unsigned int sb_index) +{ +} + #endif #endif /* _NET_DEVLINK_H_ */ diff --git a/include/net/dsa.h b/include/net/dsa.h index 6463bb2863ac..2217a3f817f8 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -16,7 +16,6 @@ #include <linux/timer.h> #include <linux/workqueue.h> #include <linux/of.h> -#include <linux/of_gpio.h> #include <linux/phy.h> #include <linux/phy_fixed.h> #include <linux/ethtool.h> @@ -27,11 +26,14 @@ enum dsa_tag_protocol { DSA_TAG_PROTO_TRAILER, DSA_TAG_PROTO_EDSA, DSA_TAG_PROTO_BRCM, + DSA_TAG_LAST, /* MUST BE LAST */ }; #define DSA_MAX_SWITCHES 4 #define DSA_MAX_PORTS 12 +#define DSA_RTABLE_NONE -1 + struct dsa_chip_data { /* * How to access the switch configuration registers. @@ -59,19 +61,11 @@ struct dsa_chip_data { struct device_node *port_dn[DSA_MAX_PORTS]; /* - * An array (with nr_chips elements) of which element [a] - * indicates which port on this switch should be used to - * send packets to that are destined for switch a. Can be - * NULL if there is only one switch chip. - */ - s8 *rtable; - - /* - * A switch may have a GPIO line tied to its reset pin. Parse - * this from the device tree, and use it before performing - * switch soft reset. + * An array of which element [a] indicates which port on this + * switch should be used to send packets to that are destined + * for switch a. Can be NULL if there is only one switch chip. */ - struct gpio_desc *reset; + s8 rtable[DSA_MAX_SWITCHES]; }; struct dsa_platform_data { @@ -93,6 +87,17 @@ struct dsa_platform_data { struct packet_type; struct dsa_switch_tree { + struct list_head list; + + /* Tree identifier */ + u32 tree; + + /* Number of switches attached to this tree */ + struct kref refcount; + + /* Has this tree been applied to the hardware? */ + bool applied; + /* * Configuration data for the platform device that owns * this dsa switch tree instance. @@ -108,7 +113,12 @@ struct dsa_switch_tree { struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); - enum dsa_tag_protocol tag_protocol; + + /* + * Original copy of the master netdev ethtool_ops + */ + struct ethtool_ops master_ethtool_ops; + const struct ethtool_ops *master_orig_ethtool_ops; /* * The switch and port to which the CPU is attached. @@ -120,9 +130,23 @@ struct dsa_switch_tree { * Data for the individual switch chips. */ struct dsa_switch *ds[DSA_MAX_SWITCHES]; + + /* + * Tagging protocol operations for adding and removing an + * encapsulation tag. + */ + const struct dsa_device_ops *tag_ops; +}; + +struct dsa_port { + struct net_device *netdev; + struct device_node *dn; + unsigned int ageing_time; }; struct dsa_switch { + struct device *dev; + /* * Parent switch tree, and switch index. */ @@ -130,14 +154,15 @@ struct dsa_switch { int index; /* - * Tagging protocol understood by this switch + * Give the switch driver somewhere to hang its private data + * structure. */ - enum dsa_tag_protocol tag_protocol; + void *priv; /* * Configuration data for this switch. */ - struct dsa_chip_data *pd; + struct dsa_chip_data *cd; /* * The used switch driver. @@ -145,9 +170,11 @@ struct dsa_switch { struct dsa_switch_driver *drv; /* - * Reference to host device to use. + * An array of which element [a] indicates which port on this + * switch should be used to send packets to that are destined + * for switch a. Can be NULL if there is only one switch chip. */ - struct device *master_dev; + s8 rtable[DSA_MAX_SWITCHES]; #ifdef CONFIG_NET_DSA_HWMON /* @@ -158,13 +185,19 @@ struct dsa_switch { #endif /* + * The lower device this switch uses to talk to the host + */ + struct net_device *master_netdev; + + /* * Slave mii_bus and devices for the individual ports. */ u32 dsa_port_mask; - u32 phys_port_mask; + u32 cpu_port_mask; + u32 enabled_port_mask; u32 phys_mii_mask; + struct dsa_port ports[DSA_MAX_PORTS]; struct mii_bus *slave_mii_bus; - struct net_device *ports[DSA_MAX_PORTS]; }; static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p) @@ -179,7 +212,7 @@ static inline bool dsa_is_dsa_port(struct dsa_switch *ds, int p) static inline bool dsa_is_port_initialized(struct dsa_switch *ds, int p) { - return ds->phys_port_mask & (1 << p) && ds->ports[p]; + return ds->enabled_port_mask & (1 << p) && ds->ports[p].netdev; } static inline u8 dsa_upstream_port(struct dsa_switch *ds) @@ -195,7 +228,7 @@ static inline u8 dsa_upstream_port(struct dsa_switch *ds) if (dst->cpu_switch == ds->index) return dst->cpu_port; else - return ds->pd->rtable[dst->cpu_switch]; + return ds->rtable[dst->cpu_switch]; } struct switchdev_trans; @@ -207,12 +240,13 @@ struct dsa_switch_driver { struct list_head list; enum dsa_tag_protocol tag_protocol; - int priv_size; /* * Probing and setup. */ - char *(*probe)(struct device *host_dev, int sw_addr); + const char *(*probe)(struct device *dsa_dev, + struct device *host_dev, int sw_addr, + void **priv); int (*setup)(struct dsa_switch *ds); int (*set_addr)(struct dsa_switch *ds, u8 *addr); u32 (*get_phy_flags)(struct dsa_switch *ds, int port); @@ -296,11 +330,12 @@ struct dsa_switch_driver { /* * Bridge integration */ + int (*set_ageing_time)(struct dsa_switch *ds, unsigned int msecs); int (*port_bridge_join)(struct dsa_switch *ds, int port, struct net_device *bridge); void (*port_bridge_leave)(struct dsa_switch *ds, int port); - int (*port_stp_update)(struct dsa_switch *ds, int port, - u8 state); + void (*port_stp_state_set)(struct dsa_switch *ds, int port, + u8 state); /* * VLAN support @@ -310,7 +345,7 @@ struct dsa_switch_driver { int (*port_vlan_prepare)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan, struct switchdev_trans *trans); - int (*port_vlan_add)(struct dsa_switch *ds, int port, + void (*port_vlan_add)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan, struct switchdev_trans *trans); int (*port_vlan_del)(struct dsa_switch *ds, int port, @@ -325,7 +360,7 @@ struct dsa_switch_driver { int (*port_fdb_prepare)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_fdb *fdb, struct switchdev_trans *trans); - int (*port_fdb_add)(struct dsa_switch *ds, int port, + void (*port_fdb_add)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_fdb *fdb, struct switchdev_trans *trans); int (*port_fdb_del)(struct dsa_switch *ds, int port, @@ -341,11 +376,14 @@ struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev); static inline void *ds_to_priv(struct dsa_switch *ds) { - return (void *)(ds + 1); + return ds->priv; } static inline bool dsa_uses_tagged_protocol(struct dsa_switch_tree *dst) { return dst->rcv != NULL; } + +void dsa_unregister_switch(struct dsa_switch *ds); +int dsa_register_switch(struct dsa_switch *ds, struct device_node *np); #endif diff --git a/include/net/dst.h b/include/net/dst.h index 5c98443c1c9e..6835d224d47b 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -85,12 +85,11 @@ struct dst_entry { #endif #ifdef CONFIG_64BIT - struct lwtunnel_state *lwtstate; /* * Align __refcnt to a 64 bytes alignment * (L1_CACHE_SIZE would be too much) */ - long __pad_to_align_refcnt[1]; + long __pad_to_align_refcnt[2]; #endif /* * __refcnt wants to be on a different cache line from @@ -99,9 +98,7 @@ struct dst_entry { atomic_t __refcnt; /* client references */ int __use; unsigned long lastuse; -#ifndef CONFIG_64BIT struct lwtunnel_state *lwtstate; -#endif union { struct dst_entry *next; struct rtable __rcu *rt_next; diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 59160de702b6..456e4a6006ab 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -17,7 +17,8 @@ struct fib_rule { u32 flags; u32 table; u8 action; - /* 3 bytes hole, try to use */ + u8 l3mdev; + /* 2 bytes hole, try to use */ u32 target; __be64 tun_id; struct fib_rule __rcu *ctarget; @@ -36,6 +37,7 @@ struct fib_lookup_arg { void *lookup_ptr; void *result; struct fib_rule *rule; + u32 table; int flags; #define FIB_LOOKUP_NOREF 1 #define FIB_LOOKUP_IGNORE_LINKSTATE 2 @@ -89,7 +91,8 @@ struct fib_rules_ops { [FRA_TABLE] = { .type = NLA_U32 }, \ [FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \ [FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \ - [FRA_GOTO] = { .type = NLA_U32 } + [FRA_GOTO] = { .type = NLA_U32 }, \ + [FRA_L3MDEV] = { .type = NLA_U8 } static inline void fib_rule_get(struct fib_rule *rule) { @@ -102,6 +105,20 @@ static inline void fib_rule_put(struct fib_rule *rule) kfree_rcu(rule, rcu); } +#ifdef CONFIG_NET_L3_MASTER_DEV +static inline u32 fib_rule_get_table(struct fib_rule *rule, + struct fib_lookup_arg *arg) +{ + return rule->l3mdev ? arg->table : rule->table; +} +#else +static inline u32 fib_rule_get_table(struct fib_rule *rule, + struct fib_lookup_arg *arg) +{ + return rule->table; +} +#endif + static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla) { if (nla[FRA_TABLE]) @@ -117,4 +134,7 @@ int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags, struct fib_lookup_arg *); int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table, u32 flags); + +int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh); +int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh); #endif diff --git a/include/net/fou.h b/include/net/fou.h index 19b8a0c62a98..f5cc6910a27e 100644 --- a/include/net/fou.h +++ b/include/net/fou.h @@ -9,11 +9,11 @@ #include <net/udp.h> size_t fou_encap_hlen(struct ip_tunnel_encap *e); -static size_t gue_encap_hlen(struct ip_tunnel_encap *e); +size_t gue_encap_hlen(struct ip_tunnel_encap *e); -int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, - u8 *protocol, struct flowi4 *fl4); -int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, - u8 *protocol, struct flowi4 *fl4); +int __fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, + u8 *protocol, __be16 *sport, int type); +int __gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, + u8 *protocol, __be16 *sport, int type); #endif diff --git a/include/net/fq.h b/include/net/fq.h new file mode 100644 index 000000000000..268b49049c37 --- /dev/null +++ b/include/net/fq.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2016 Qualcomm Atheros, Inc + * + * GPL v2 + * + * Based on net/sched/sch_fq_codel.c + */ +#ifndef __NET_SCHED_FQ_H +#define __NET_SCHED_FQ_H + +struct fq_tin; + +/** + * struct fq_flow - per traffic flow queue + * + * @tin: owner of this flow. Used to manage collisions, i.e. when a packet + * hashes to an index which points to a flow that is already owned by a + * different tin the packet is destined to. In such case the implementer + * must provide a fallback flow + * @flowchain: can be linked to fq_tin's new_flows or old_flows. Used for DRR++ + * (deficit round robin) based round robin queuing similar to the one + * found in net/sched/sch_fq_codel.c + * @backlogchain: can be linked to other fq_flow and fq. Used to keep track of + * fat flows and efficient head-dropping if packet limit is reached + * @queue: sk_buff queue to hold packets + * @backlog: number of bytes pending in the queue. The number of packets can be + * found in @queue.qlen + * @deficit: used for DRR++ + */ +struct fq_flow { + struct fq_tin *tin; + struct list_head flowchain; + struct list_head backlogchain; + struct sk_buff_head queue; + u32 backlog; + int deficit; +}; + +/** + * struct fq_tin - a logical container of fq_flows + * + * Used to group fq_flows into a logical aggregate. DRR++ scheme is used to + * pull interleaved packets out of the associated flows. + * + * @new_flows: linked list of fq_flow + * @old_flows: linked list of fq_flow + */ +struct fq_tin { + struct list_head new_flows; + struct list_head old_flows; + u32 backlog_bytes; + u32 backlog_packets; + u32 overlimit; + u32 collisions; + u32 flows; + u32 tx_bytes; + u32 tx_packets; +}; + +/** + * struct fq - main container for fair queuing purposes + * + * @backlogs: linked to fq_flows. Used to maintain fat flows for efficient + * head-dropping when @backlog reaches @limit + * @limit: max number of packets that can be queued across all flows + * @backlog: number of packets queued across all flows + */ +struct fq { + struct fq_flow *flows; + struct list_head backlogs; + spinlock_t lock; + u32 flows_cnt; + u32 perturbation; + u32 limit; + u32 quantum; + u32 backlog; + u32 overlimit; + u32 collisions; +}; + +typedef struct sk_buff *fq_tin_dequeue_t(struct fq *, + struct fq_tin *, + struct fq_flow *flow); + +typedef void fq_skb_free_t(struct fq *, + struct fq_tin *, + struct fq_flow *, + struct sk_buff *); + +typedef struct fq_flow *fq_flow_get_default_t(struct fq *, + struct fq_tin *, + int idx, + struct sk_buff *); + +#endif diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h new file mode 100644 index 000000000000..163f3ed0f05a --- /dev/null +++ b/include/net/fq_impl.h @@ -0,0 +1,277 @@ +/* + * Copyright (c) 2016 Qualcomm Atheros, Inc + * + * GPL v2 + * + * Based on net/sched/sch_fq_codel.c + */ +#ifndef __NET_SCHED_FQ_IMPL_H +#define __NET_SCHED_FQ_IMPL_H + +#include <net/fq.h> + +/* functions that are embedded into includer */ + +static struct sk_buff *fq_flow_dequeue(struct fq *fq, + struct fq_flow *flow) +{ + struct fq_tin *tin = flow->tin; + struct fq_flow *i; + struct sk_buff *skb; + + lockdep_assert_held(&fq->lock); + + skb = __skb_dequeue(&flow->queue); + if (!skb) + return NULL; + + tin->backlog_bytes -= skb->len; + tin->backlog_packets--; + flow->backlog -= skb->len; + fq->backlog--; + + if (flow->backlog == 0) { + list_del_init(&flow->backlogchain); + } else { + i = flow; + + list_for_each_entry_continue(i, &fq->backlogs, backlogchain) + if (i->backlog < flow->backlog) + break; + + list_move_tail(&flow->backlogchain, + &i->backlogchain); + } + + return skb; +} + +static struct sk_buff *fq_tin_dequeue(struct fq *fq, + struct fq_tin *tin, + fq_tin_dequeue_t dequeue_func) +{ + struct fq_flow *flow; + struct list_head *head; + struct sk_buff *skb; + + lockdep_assert_held(&fq->lock); + +begin: + head = &tin->new_flows; + if (list_empty(head)) { + head = &tin->old_flows; + if (list_empty(head)) + return NULL; + } + + flow = list_first_entry(head, struct fq_flow, flowchain); + + if (flow->deficit <= 0) { + flow->deficit += fq->quantum; + list_move_tail(&flow->flowchain, + &tin->old_flows); + goto begin; + } + + skb = dequeue_func(fq, tin, flow); + if (!skb) { + /* force a pass through old_flows to prevent starvation */ + if ((head == &tin->new_flows) && + !list_empty(&tin->old_flows)) { + list_move_tail(&flow->flowchain, &tin->old_flows); + } else { + list_del_init(&flow->flowchain); + flow->tin = NULL; + } + goto begin; + } + + flow->deficit -= skb->len; + tin->tx_bytes += skb->len; + tin->tx_packets++; + + return skb; +} + +static struct fq_flow *fq_flow_classify(struct fq *fq, + struct fq_tin *tin, + struct sk_buff *skb, + fq_flow_get_default_t get_default_func) +{ + struct fq_flow *flow; + u32 hash; + u32 idx; + + lockdep_assert_held(&fq->lock); + + hash = skb_get_hash_perturb(skb, fq->perturbation); + idx = reciprocal_scale(hash, fq->flows_cnt); + flow = &fq->flows[idx]; + + if (flow->tin && flow->tin != tin) { + flow = get_default_func(fq, tin, idx, skb); + tin->collisions++; + fq->collisions++; + } + + if (!flow->tin) + tin->flows++; + + return flow; +} + +static void fq_recalc_backlog(struct fq *fq, + struct fq_tin *tin, + struct fq_flow *flow) +{ + struct fq_flow *i; + + if (list_empty(&flow->backlogchain)) + list_add_tail(&flow->backlogchain, &fq->backlogs); + + i = flow; + list_for_each_entry_continue_reverse(i, &fq->backlogs, + backlogchain) + if (i->backlog > flow->backlog) + break; + + list_move(&flow->backlogchain, &i->backlogchain); +} + +static void fq_tin_enqueue(struct fq *fq, + struct fq_tin *tin, + struct sk_buff *skb, + fq_skb_free_t free_func, + fq_flow_get_default_t get_default_func) +{ + struct fq_flow *flow; + + lockdep_assert_held(&fq->lock); + + flow = fq_flow_classify(fq, tin, skb, get_default_func); + + flow->tin = tin; + flow->backlog += skb->len; + tin->backlog_bytes += skb->len; + tin->backlog_packets++; + fq->backlog++; + + fq_recalc_backlog(fq, tin, flow); + + if (list_empty(&flow->flowchain)) { + flow->deficit = fq->quantum; + list_add_tail(&flow->flowchain, + &tin->new_flows); + } + + __skb_queue_tail(&flow->queue, skb); + + if (fq->backlog > fq->limit) { + flow = list_first_entry_or_null(&fq->backlogs, + struct fq_flow, + backlogchain); + if (!flow) + return; + + skb = fq_flow_dequeue(fq, flow); + if (!skb) + return; + + free_func(fq, flow->tin, flow, skb); + + flow->tin->overlimit++; + fq->overlimit++; + } +} + +static void fq_flow_reset(struct fq *fq, + struct fq_flow *flow, + fq_skb_free_t free_func) +{ + struct sk_buff *skb; + + while ((skb = fq_flow_dequeue(fq, flow))) + free_func(fq, flow->tin, flow, skb); + + if (!list_empty(&flow->flowchain)) + list_del_init(&flow->flowchain); + + if (!list_empty(&flow->backlogchain)) + list_del_init(&flow->backlogchain); + + flow->tin = NULL; + + WARN_ON_ONCE(flow->backlog); +} + +static void fq_tin_reset(struct fq *fq, + struct fq_tin *tin, + fq_skb_free_t free_func) +{ + struct list_head *head; + struct fq_flow *flow; + + for (;;) { + head = &tin->new_flows; + if (list_empty(head)) { + head = &tin->old_flows; + if (list_empty(head)) + break; + } + + flow = list_first_entry(head, struct fq_flow, flowchain); + fq_flow_reset(fq, flow, free_func); + } + + WARN_ON_ONCE(tin->backlog_bytes); + WARN_ON_ONCE(tin->backlog_packets); +} + +static void fq_flow_init(struct fq_flow *flow) +{ + INIT_LIST_HEAD(&flow->flowchain); + INIT_LIST_HEAD(&flow->backlogchain); + __skb_queue_head_init(&flow->queue); +} + +static void fq_tin_init(struct fq_tin *tin) +{ + INIT_LIST_HEAD(&tin->new_flows); + INIT_LIST_HEAD(&tin->old_flows); +} + +static int fq_init(struct fq *fq, int flows_cnt) +{ + int i; + + memset(fq, 0, sizeof(fq[0])); + INIT_LIST_HEAD(&fq->backlogs); + spin_lock_init(&fq->lock); + fq->flows_cnt = max_t(u32, flows_cnt, 1); + fq->perturbation = prandom_u32(); + fq->quantum = 300; + fq->limit = 8192; + + fq->flows = kcalloc(fq->flows_cnt, sizeof(fq->flows[0]), GFP_KERNEL); + if (!fq->flows) + return -ENOMEM; + + for (i = 0; i < fq->flows_cnt; i++) + fq_flow_init(&fq->flows[i]); + + return 0; +} + +static void fq_reset(struct fq *fq, + fq_skb_free_t free_func) +{ + int i; + + for (i = 0; i < fq->flows_cnt; i++) + fq_flow_reset(fq, &fq->flows[i], free_func); + + kfree(fq->flows); + fq->flows = NULL; +} + +#endif diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h index cbafa3768d48..231e121cc7d9 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -19,22 +19,26 @@ struct gnet_dump { /* Backward compatibility */ int compat_tc_stats; int compat_xstats; + int padattr; void * xstats; int xstats_len; struct tc_stats tc_stats; }; int gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock, - struct gnet_dump *d); + struct gnet_dump *d, int padattr); int gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type, int xstats_type, - spinlock_t *lock, struct gnet_dump *d); + spinlock_t *lock, struct gnet_dump *d, + int padattr); -int gnet_stats_copy_basic(struct gnet_dump *d, +int gnet_stats_copy_basic(const seqcount_t *running, + struct gnet_dump *d, struct gnet_stats_basic_cpu __percpu *cpu, struct gnet_stats_basic_packed *b); -void __gnet_stats_copy_basic(struct gnet_stats_basic_packed *bstats, +void __gnet_stats_copy_basic(const seqcount_t *running, + struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu, struct gnet_stats_basic_packed *b); int gnet_stats_copy_rate_est(struct gnet_dump *d, @@ -50,13 +54,15 @@ int gnet_stats_finish_copy(struct gnet_dump *d); int gen_new_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu_bstats, struct gnet_stats_rate_est64 *rate_est, - spinlock_t *stats_lock, struct nlattr *opt); + spinlock_t *stats_lock, + seqcount_t *running, struct nlattr *opt); void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_rate_est64 *rate_est); int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu_bstats, struct gnet_stats_rate_est64 *rate_est, - spinlock_t *stats_lock, struct nlattr *opt); + spinlock_t *stats_lock, + seqcount_t *running, struct nlattr *opt); bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, const struct gnet_stats_rate_est64 *rate_est); #endif diff --git a/include/net/geneve.h b/include/net/geneve.h index e6c23dc765f7..ec0327d4331b 100644 --- a/include/net/geneve.h +++ b/include/net/geneve.h @@ -1,10 +1,7 @@ #ifndef __NET_GENEVE_H #define __NET_GENEVE_H 1 -#ifdef CONFIG_INET #include <net/udp_tunnel.h> -#endif - /* Geneve Header: * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ @@ -62,14 +59,6 @@ struct genevehdr { struct geneve_opt options[]; }; -#if IS_ENABLED(CONFIG_GENEVE) -void geneve_get_rx_port(struct net_device *netdev); -#else -static inline void geneve_get_rx_port(struct net_device *netdev) -{ -} -#endif - #ifdef CONFIG_INET struct net_device *geneve_dev_create_fb(struct net *net, const char *name, u8 name_assign_type, u16 dst_port); diff --git a/include/net/gre.h b/include/net/gre.h index 97eafdc47eea..7a54a31d1d4c 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -25,4 +25,108 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version); struct net_device *gretap_fb_dev_create(struct net *net, const char *name, u8 name_assign_type); +int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, + bool *csum_err, __be16 proto, int nhs); + +static inline int gre_calc_hlen(__be16 o_flags) +{ + int addend = 4; + + if (o_flags & TUNNEL_CSUM) + addend += 4; + if (o_flags & TUNNEL_KEY) + addend += 4; + if (o_flags & TUNNEL_SEQ) + addend += 4; + return addend; +} + +static inline __be16 gre_flags_to_tnl_flags(__be16 flags) +{ + __be16 tflags = 0; + + if (flags & GRE_CSUM) + tflags |= TUNNEL_CSUM; + if (flags & GRE_ROUTING) + tflags |= TUNNEL_ROUTING; + if (flags & GRE_KEY) + tflags |= TUNNEL_KEY; + if (flags & GRE_SEQ) + tflags |= TUNNEL_SEQ; + if (flags & GRE_STRICT) + tflags |= TUNNEL_STRICT; + if (flags & GRE_REC) + tflags |= TUNNEL_REC; + if (flags & GRE_VERSION) + tflags |= TUNNEL_VERSION; + + return tflags; +} + +static inline __be16 gre_tnl_flags_to_gre_flags(__be16 tflags) +{ + __be16 flags = 0; + + if (tflags & TUNNEL_CSUM) + flags |= GRE_CSUM; + if (tflags & TUNNEL_ROUTING) + flags |= GRE_ROUTING; + if (tflags & TUNNEL_KEY) + flags |= GRE_KEY; + if (tflags & TUNNEL_SEQ) + flags |= GRE_SEQ; + if (tflags & TUNNEL_STRICT) + flags |= GRE_STRICT; + if (tflags & TUNNEL_REC) + flags |= GRE_REC; + if (tflags & TUNNEL_VERSION) + flags |= GRE_VERSION; + + return flags; +} + +static inline __sum16 gre_checksum(struct sk_buff *skb) +{ + __wsum csum; + + if (skb->ip_summed == CHECKSUM_PARTIAL) + csum = lco_csum(skb); + else + csum = skb_checksum(skb, 0, skb->len, 0); + return csum_fold(csum); +} + +static inline void gre_build_header(struct sk_buff *skb, int hdr_len, + __be16 flags, __be16 proto, + __be32 key, __be32 seq) +{ + struct gre_base_hdr *greh; + + skb_push(skb, hdr_len); + + skb_reset_transport_header(skb); + greh = (struct gre_base_hdr *)skb->data; + greh->flags = gre_tnl_flags_to_gre_flags(flags); + greh->protocol = proto; + + if (flags & (TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_SEQ)) { + __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); + + if (flags & TUNNEL_SEQ) { + *ptr = seq; + ptr--; + } + if (flags & TUNNEL_KEY) { + *ptr = key; + ptr--; + } + if (flags & TUNNEL_CSUM && + !(skb_shinfo(skb)->gso_type & + (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) { + *ptr = 0; + *(__sum16 *)ptr = gre_checksum(skb); + } + } +} + #endif diff --git a/include/net/gro_cells.h b/include/net/gro_cells.h index cf6c74550baa..d15214d673b2 100644 --- a/include/net/gro_cells.h +++ b/include/net/gro_cells.h @@ -14,27 +14,26 @@ struct gro_cells { struct gro_cell __percpu *cells; }; -static inline void gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb) +static inline int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb) { struct gro_cell *cell; struct net_device *dev = skb->dev; - if (!gcells->cells || skb_cloned(skb) || !(dev->features & NETIF_F_GRO)) { - netif_rx(skb); - return; - } + if (!gcells->cells || skb_cloned(skb) || !(dev->features & NETIF_F_GRO)) + return netif_rx(skb); cell = this_cpu_ptr(gcells->cells); if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) { atomic_long_inc(&dev->rx_dropped); kfree_skb(skb); - return; + return NET_RX_DROP; } __skb_queue_tail(&cell->napi_skbs, skb); if (skb_queue_len(&cell->napi_skbs) == 1) napi_schedule(&cell->napi); + return NET_RX_SUCCESS; } /* called under BH context */ diff --git a/include/net/gtp.h b/include/net/gtp.h new file mode 100644 index 000000000000..6398891b99ba --- /dev/null +++ b/include/net/gtp.h @@ -0,0 +1,34 @@ +#ifndef _GTP_H_ +#define _GTP_H_ + +/* General GTP protocol related definitions. */ + +#define GTP0_PORT 3386 +#define GTP1U_PORT 2152 + +#define GTP_TPDU 255 + +struct gtp0_header { /* According to GSM TS 09.60. */ + __u8 flags; + __u8 type; + __be16 length; + __be16 seq; + __be16 flow; + __u8 number; + __u8 spare[3]; + __be64 tid; +} __attribute__ ((packed)); + +struct gtp1_header { /* According to 3GPP TS 29.060. */ + __u8 flags; + __u8 type; + __be16 length; + __be32 tid; +} __attribute__ ((packed)); + +#define GTP1_F_NPDU 0x01 +#define GTP1_F_SEQ 0x02 +#define GTP1_F_EXTHDR 0x04 +#define GTP1_F_MASK 0x07 + +#endif diff --git a/include/net/icmp.h b/include/net/icmp.h index 970028e13382..3ef2743a8eec 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -30,9 +30,9 @@ struct icmp_err { extern const struct icmp_err icmp_err_convert[]; #define ICMP_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.icmp_statistics, field) -#define ICMP_INC_STATS_BH(net, field) SNMP_INC_STATS_BH((net)->mib.icmp_statistics, field) +#define __ICMP_INC_STATS(net, field) __SNMP_INC_STATS((net)->mib.icmp_statistics, field) #define ICMPMSGOUT_INC_STATS(net, field) SNMP_INC_STATS_ATOMIC_LONG((net)->mib.icmpmsg_statistics, field+256) -#define ICMPMSGIN_INC_STATS_BH(net, field) SNMP_INC_STATS_ATOMIC_LONG((net)->mib.icmpmsg_statistics, field) +#define ICMPMSGIN_INC_STATS(net, field) SNMP_INC_STATS_ATOMIC_LONG((net)->mib.icmpmsg_statistics, field) struct dst_entry; struct net_proto_family; diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 28332bdac333..b87becacd9d3 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -66,13 +66,15 @@ static inline struct sock *__inet6_lookup(struct net *net, const __be16 sport, const struct in6_addr *daddr, const u16 hnum, - const int dif) + const int dif, + bool *refcounted) { struct sock *sk = __inet6_lookup_established(net, hashinfo, saddr, sport, daddr, hnum, dif); + *refcounted = true; if (sk) return sk; - + *refcounted = false; return inet6_lookup_listener(net, hashinfo, skb, doff, saddr, sport, daddr, hnum, dif); } @@ -81,17 +83,19 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const __be16 sport, const __be16 dport, - int iif) + int iif, + bool *refcounted) { struct sock *sk = skb_steal_sock(skb); + *refcounted = true; if (sk) return sk; return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb, doff, &ipv6_hdr(skb)->saddr, sport, &ipv6_hdr(skb)->daddr, ntohs(dport), - iif); + iif, refcounted); } struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, diff --git a/include/net/inet_common.h b/include/net/inet_common.h index 109e3ee9108c..5d683428fced 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -39,6 +39,11 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family, int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len); +struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff *skb); +int inet_gro_complete(struct sk_buff *skb, int nhoff); +struct sk_buff *inet_gso_segment(struct sk_buff *skb, + netdev_features_t features); + static inline void inet_ctl_sock_destroy(struct sock *sk) { if (sk) diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 50f635c2c536..0574493e3899 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -100,14 +100,10 @@ struct inet_bind_hashbucket { /* * Sockets can be hashed in established or listening table - * We must use different 'nulls' end-of-chain value for listening - * hash table, or we might find a socket that was closed and - * reallocated/inserted into established hash table */ -#define LISTENING_NULLS_BASE (1U << 29) struct inet_listen_hashbucket { spinlock_t lock; - struct hlist_nulls_head head; + struct hlist_head head; }; /* This is for listening sockets, thus all sockets which possess wildcards. */ @@ -280,11 +276,8 @@ static inline struct sock *inet_lookup_listener(struct net *net, net_eq(sock_net(__sk), (__net))) #endif /* 64-bit arch */ -/* - * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need +/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need * not check it for lookups anymore, thanks Alexey. -DaveM - * - * Local BH must be disabled here. */ struct sock *__inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo, @@ -307,14 +300,20 @@ static inline struct sock *__inet_lookup(struct net *net, struct sk_buff *skb, int doff, const __be32 saddr, const __be16 sport, const __be32 daddr, const __be16 dport, - const int dif) + const int dif, + bool *refcounted) { u16 hnum = ntohs(dport); - struct sock *sk = __inet_lookup_established(net, hashinfo, - saddr, sport, daddr, hnum, dif); + struct sock *sk; - return sk ? : __inet_lookup_listener(net, hashinfo, skb, doff, saddr, - sport, daddr, hnum, dif); + sk = __inet_lookup_established(net, hashinfo, saddr, sport, + daddr, hnum, dif); + *refcounted = true; + if (sk) + return sk; + *refcounted = false; + return __inet_lookup_listener(net, hashinfo, skb, doff, saddr, + sport, daddr, hnum, dif); } static inline struct sock *inet_lookup(struct net *net, @@ -325,12 +324,13 @@ static inline struct sock *inet_lookup(struct net *net, const int dif) { struct sock *sk; + bool refcounted; - local_bh_disable(); sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr, - dport, dif); - local_bh_enable(); + dport, dif, &refcounted); + if (sk && !refcounted && !atomic_inc_not_zero(&sk->sk_refcnt)) + sk = NULL; return sk; } @@ -338,17 +338,20 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const __be16 sport, - const __be16 dport) + const __be16 dport, + bool *refcounted) { struct sock *sk = skb_steal_sock(skb); const struct iphdr *iph = ip_hdr(skb); + *refcounted = true; if (sk) return sk; - else - return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb, - doff, iph->saddr, sport, - iph->daddr, dport, inet_iif(skb)); + + return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb, + doff, iph->saddr, sport, + iph->daddr, dport, inet_iif(skb), + refcounted); } u32 sk_ehashfn(const struct sock *sk); diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 012b1f91f3ec..236a81034fef 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -97,7 +97,12 @@ struct inet_request_sock { u32 ir_mark; union { struct ip_options_rcu *opt; - struct sk_buff *pktopts; +#if IS_ENABLED(CONFIG_IPV6) + struct { + struct ipv6_txoptions *ipv6_opt; + struct sk_buff *pktopts; + }; +#endif }; }; diff --git a/include/net/ip.h b/include/net/ip.h index fad74d323bd6..9742b92dc933 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -36,6 +36,7 @@ struct sock; struct inet_skb_parm { + int iif; struct ip_options opt; /* Compiled IP options */ unsigned char flags; @@ -46,6 +47,7 @@ struct inet_skb_parm { #define IPSKB_REROUTED BIT(4) #define IPSKB_DOREDIRECT BIT(5) #define IPSKB_FRAG_PMTU BIT(6) +#define IPSKB_FRAG_SEGS BIT(7) u16 frag_max_size; }; @@ -56,6 +58,7 @@ static inline unsigned int ip_hdrlen(const struct sk_buff *skb) } struct ipcm_cookie { + struct sockcm_cookie sockc; __be32 addr; int oif; struct ip_options_rcu *opt; @@ -186,17 +189,15 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, unsigned int len); #define IP_INC_STATS(net, field) SNMP_INC_STATS64((net)->mib.ip_statistics, field) -#define IP_INC_STATS_BH(net, field) SNMP_INC_STATS64_BH((net)->mib.ip_statistics, field) +#define __IP_INC_STATS(net, field) __SNMP_INC_STATS64((net)->mib.ip_statistics, field) #define IP_ADD_STATS(net, field, val) SNMP_ADD_STATS64((net)->mib.ip_statistics, field, val) -#define IP_ADD_STATS_BH(net, field, val) SNMP_ADD_STATS64_BH((net)->mib.ip_statistics, field, val) +#define __IP_ADD_STATS(net, field, val) __SNMP_ADD_STATS64((net)->mib.ip_statistics, field, val) #define IP_UPD_PO_STATS(net, field, val) SNMP_UPD_PO_STATS64((net)->mib.ip_statistics, field, val) -#define IP_UPD_PO_STATS_BH(net, field, val) SNMP_UPD_PO_STATS64_BH((net)->mib.ip_statistics, field, val) +#define __IP_UPD_PO_STATS(net, field, val) __SNMP_UPD_PO_STATS64((net)->mib.ip_statistics, field, val) #define NET_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.net_statistics, field) -#define NET_INC_STATS_BH(net, field) SNMP_INC_STATS_BH((net)->mib.net_statistics, field) -#define NET_INC_STATS_USER(net, field) SNMP_INC_STATS_USER((net)->mib.net_statistics, field) +#define __NET_INC_STATS(net, field) __SNMP_INC_STATS((net)->mib.net_statistics, field) #define NET_ADD_STATS(net, field, adnd) SNMP_ADD_STATS((net)->mib.net_statistics, field, adnd) -#define NET_ADD_STATS_BH(net, field, adnd) SNMP_ADD_STATS_BH((net)->mib.net_statistics, field, adnd) -#define NET_ADD_STATS_USER(net, field, adnd) SNMP_ADD_STATS_USER((net)->mib.net_statistics, field, adnd) +#define __NET_ADD_STATS(net, field, adnd) __SNMP_ADD_STATS((net)->mib.net_statistics, field, adnd) u64 snmp_get_cpu_field(void __percpu *mib, int cpu, int offct); unsigned long snmp_fold_field(void __percpu *mib, int offt); @@ -313,10 +314,9 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, return min(dst->dev->mtu, IP_MAX_MTU); } -static inline unsigned int ip_skb_dst_mtu(const struct sk_buff *skb) +static inline unsigned int ip_skb_dst_mtu(struct sock *sk, + const struct sk_buff *skb) { - struct sock *sk = skb->sk; - if (!sk || !sk_fullsock(sk) || ip_sk_use_pmtu(sk)) { bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED; @@ -550,7 +550,7 @@ int ip_options_rcv_srr(struct sk_buff *skb); void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb); void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, int offset); -int ip_cmsg_send(struct net *net, struct msghdr *msg, +int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, bool allow_ipv6); int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 295d291269e2..d97305d0e71f 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -18,6 +18,7 @@ struct route_info { __u8 prefix[0]; /* 0,8 or 16 */ }; +#include <net/addrconf.h> #include <net/flow.h> #include <net/ip6_fib.h> #include <net/sock.h> @@ -76,6 +77,8 @@ static inline struct dst_entry *ip6_route_output(struct net *net, struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, int flags); +struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, + int ifindex, struct flowi6 *fl6, int flags); int ip6_route_init(void); void ip6_route_cleanup(void); @@ -86,9 +89,23 @@ int ip6_route_add(struct fib6_config *cfg); int ip6_ins_rt(struct rt6_info *); int ip6_del_rt(struct rt6_info *); -int ip6_route_get_saddr(struct net *net, struct rt6_info *rt, - const struct in6_addr *daddr, unsigned int prefs, - struct in6_addr *saddr); +static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt, + const struct in6_addr *daddr, + unsigned int prefs, + struct in6_addr *saddr) +{ + struct inet6_dev *idev = + rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL; + int err = 0; + + if (rt && rt->rt6i_prefsrc.plen) + *saddr = rt->rt6i_prefsrc.addr; + else + err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, + daddr, prefs, saddr); + + return err; +} struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr, int oif, int flags); @@ -101,6 +118,9 @@ void fib6_force_start_gc(struct net *net); struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, const struct in6_addr *addr, bool anycast); +struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev, + int flags); + /* * support functions for ND * diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 499a707765ea..43a5a0e4524c 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -42,6 +42,7 @@ struct ip6_tnl { struct __ip6_tnl_parm parms; /* tunnel configuration parameters */ struct flowi fl; /* flowi template for xmit */ struct dst_cache dst_cache; /* cached dst */ + struct gro_cells gro_cells; int err_count; unsigned long err_time; @@ -49,10 +50,72 @@ struct ip6_tnl { /* These fields used only by GRE */ __u32 i_seqno; /* The last seen seqno */ __u32 o_seqno; /* The last output seqno */ - int hlen; /* Precalculated GRE header length */ + int hlen; /* tun_hlen + encap_hlen */ + int tun_hlen; /* Precalculated header length */ + int encap_hlen; /* Encap header length (FOU,GUE) */ + struct ip_tunnel_encap encap; int mlink; }; +struct ip6_tnl_encap_ops { + size_t (*encap_hlen)(struct ip_tunnel_encap *e); + int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e, + u8 *protocol, struct flowi6 *fl6); +}; + +#ifdef CONFIG_INET + +extern const struct ip6_tnl_encap_ops __rcu * + ip6tun_encaps[MAX_IPTUN_ENCAP_OPS]; + +int ip6_tnl_encap_add_ops(const struct ip6_tnl_encap_ops *ops, + unsigned int num); +int ip6_tnl_encap_del_ops(const struct ip6_tnl_encap_ops *ops, + unsigned int num); +int ip6_tnl_encap_setup(struct ip6_tnl *t, + struct ip_tunnel_encap *ipencap); + +static inline int ip6_encap_hlen(struct ip_tunnel_encap *e) +{ + const struct ip6_tnl_encap_ops *ops; + int hlen = -EINVAL; + + if (e->type == TUNNEL_ENCAP_NONE) + return 0; + + if (e->type >= MAX_IPTUN_ENCAP_OPS) + return -EINVAL; + + rcu_read_lock(); + ops = rcu_dereference(ip6tun_encaps[e->type]); + if (likely(ops && ops->encap_hlen)) + hlen = ops->encap_hlen(e); + rcu_read_unlock(); + + return hlen; +} + +static inline int ip6_tnl_encap(struct sk_buff *skb, struct ip6_tnl *t, + u8 *protocol, struct flowi6 *fl6) +{ + const struct ip6_tnl_encap_ops *ops; + int ret = -EINVAL; + + if (t->encap.type == TUNNEL_ENCAP_NONE) + return 0; + + if (t->encap.type >= MAX_IPTUN_ENCAP_OPS) + return -EINVAL; + + rcu_read_lock(); + ops = rcu_dereference(ip6tun_encaps[t->encap.type]); + if (likely(ops && ops->build_header)) + ret = ops->build_header(skb, &t->encap, protocol, fl6); + rcu_read_unlock(); + + return ret; +} + /* Tunnel encapsulation limit destination sub-option */ struct ipv6_tlv_tnl_enc_lim { @@ -63,15 +126,20 @@ struct ipv6_tlv_tnl_enc_lim { int ip6_tnl_rcv_ctl(struct ip6_tnl *t, const struct in6_addr *laddr, const struct in6_addr *raddr); +int ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, + const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, + bool log_ecn_error); int ip6_tnl_xmit_ctl(struct ip6_tnl *t, const struct in6_addr *laddr, const struct in6_addr *raddr); +int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, + struct flowi6 *fl6, int encap_limit, __u32 *pmtu, __u8 proto); __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw); __u32 ip6_tnl_get_cap(struct ip6_tnl *t, const struct in6_addr *laddr, const struct in6_addr *raddr); struct net *ip6_tnl_get_link_net(const struct net_device *dev); int ip6_tnl_get_iflink(const struct net_device *dev); +int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu); -#ifdef CONFIG_INET static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, struct net_device *dev) { diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 56050f913339..a5e7035fb93f 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -105,24 +105,23 @@ struct ip_tunnel { struct net_device *dev; struct net *net; /* netns for packet i/o */ - int err_count; /* Number of arrived ICMP errors */ unsigned long err_time; /* Time when the last ICMP error * arrived */ + int err_count; /* Number of arrived ICMP errors */ /* These four fields used only by GRE */ u32 i_seqno; /* The last seen seqno */ u32 o_seqno; /* The last output seqno */ int tun_hlen; /* Precalculated header length */ - int mlink; struct dst_cache dst_cache; struct ip_tunnel_parm parms; + int mlink; int encap_hlen; /* Encap header length (FOU,GUE) */ - struct ip_tunnel_encap encap; - int hlen; /* tun_hlen + encap_hlen */ + struct ip_tunnel_encap encap; /* for SIT */ #ifdef CONFIG_IPV6_SIT_6RD @@ -133,6 +132,7 @@ struct ip_tunnel { int ip_tnl_net_id; struct gro_cells gro_cells; bool collect_md; + bool ignore_df; }; #define TUNNEL_CSUM __cpu_to_be16(0x01) @@ -157,10 +157,12 @@ struct tnl_ptk_info { __be16 proto; __be32 key; __be32 seq; + int hdr_len; }; #define PACKET_RCVD 0 #define PACKET_REJECT 1 +#define PACKET_NEXT 2 #define IP_TNL_HASH_BITS 7 #define IP_TNL_HASH_SIZE (1 << IP_TNL_HASH_BITS) @@ -171,22 +173,6 @@ struct ip_tunnel_net { struct ip_tunnel __rcu *collect_md_tun; }; -struct ip_tunnel_encap_ops { - size_t (*encap_hlen)(struct ip_tunnel_encap *e); - int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e, - u8 *protocol, struct flowi4 *fl4); -}; - -#define MAX_IPTUN_ENCAP_OPS 8 - -extern const struct ip_tunnel_encap_ops __rcu * - iptun_encaps[MAX_IPTUN_ENCAP_OPS]; - -int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *op, - unsigned int num); -int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *op, - unsigned int num); - static inline void ip_tunnel_key_init(struct ip_tunnel_key *key, __be32 saddr, __be32 daddr, u8 tos, u8 ttl, __be32 label, @@ -251,8 +237,6 @@ void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops); void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, const u8 protocol); int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); -int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t, - u8 *protocol, struct flowi4 *fl4); int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict); int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu); @@ -271,9 +255,67 @@ int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm *p); void ip_tunnel_setup(struct net_device *dev, int net_id); + +struct ip_tunnel_encap_ops { + size_t (*encap_hlen)(struct ip_tunnel_encap *e); + int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e, + u8 *protocol, struct flowi4 *fl4); +}; + +#define MAX_IPTUN_ENCAP_OPS 8 + +extern const struct ip_tunnel_encap_ops __rcu * + iptun_encaps[MAX_IPTUN_ENCAP_OPS]; + +int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *op, + unsigned int num); +int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *op, + unsigned int num); + int ip_tunnel_encap_setup(struct ip_tunnel *t, struct ip_tunnel_encap *ipencap); +static inline int ip_encap_hlen(struct ip_tunnel_encap *e) +{ + const struct ip_tunnel_encap_ops *ops; + int hlen = -EINVAL; + + if (e->type == TUNNEL_ENCAP_NONE) + return 0; + + if (e->type >= MAX_IPTUN_ENCAP_OPS) + return -EINVAL; + + rcu_read_lock(); + ops = rcu_dereference(iptun_encaps[e->type]); + if (likely(ops && ops->encap_hlen)) + hlen = ops->encap_hlen(e); + rcu_read_unlock(); + + return hlen; +} + +static inline int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t, + u8 *protocol, struct flowi4 *fl4) +{ + const struct ip_tunnel_encap_ops *ops; + int ret = -EINVAL; + + if (t->encap.type == TUNNEL_ENCAP_NONE) + return 0; + + if (t->encap.type >= MAX_IPTUN_ENCAP_OPS) + return -EINVAL; + + rcu_read_lock(); + ops = rcu_dereference(iptun_encaps[t->encap.type]); + if (likely(ops && ops->build_header)) + ret = ops->build_header(skb, &t->encap, protocol, fl4); + rcu_read_unlock(); + + return ret; +} + /* Extract dsfield from inner protocol */ static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph, const struct sk_buff *skb) @@ -295,15 +337,22 @@ static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph, return INET_ECN_encapsulate(tos, inner); } -int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto, - bool xnet); +int __iptunnel_pull_header(struct sk_buff *skb, int hdr_len, + __be16 inner_proto, bool raw_proto, bool xnet); + +static inline int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, + __be16 inner_proto, bool xnet) +{ + return __iptunnel_pull_header(skb, hdr_len, inner_proto, false, xnet); +} + void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, u8 proto, u8 tos, u8 ttl, __be16 df, bool xnet); struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, gfp_t flags); -struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, int gso_type_mask); +int iptunnel_handle_offloads(struct sk_buff *skb, int gso_type_mask); static inline int iptunnel_pull_offloads(struct sk_buff *skb) { diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index a6cc576fd467..cd6018a9ee24 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -731,6 +731,12 @@ struct ip_vs_pe { u32 (*hashkey_raw)(const struct ip_vs_conn_param *p, u32 initval, bool inverse); int (*show_pe_data)(const struct ip_vs_conn *cp, char *buf); + /* create connections for real-server outgoing packets */ + struct ip_vs_conn* (*conn_out)(struct ip_vs_service *svc, + struct ip_vs_dest *dest, + struct sk_buff *skb, + const struct ip_vs_iphdr *iph, + __be16 dport, __be16 cport); }; /* The application module object (a.k.a. app incarnation) */ @@ -874,6 +880,7 @@ struct netns_ipvs { /* Service counters */ atomic_t ftpsvc_counter; atomic_t nullsvc_counter; + atomic_t conn_out_counter; #ifdef CONFIG_SYSCTL /* 1/rate drop and drop-entry variables */ @@ -1147,6 +1154,12 @@ static inline int sysctl_cache_bypass(struct netns_ipvs *ipvs) */ const char *ip_vs_proto_name(unsigned int proto); void ip_vs_init_hash_table(struct list_head *table, int rows); +struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc, + struct ip_vs_dest *dest, + struct sk_buff *skb, + const struct ip_vs_iphdr *iph, + __be16 dport, + __be16 cport); #define IP_VS_INIT_HASH_TABLE(t) ip_vs_init_hash_table((t), ARRAY_SIZE((t))) #define IP_VS_APP_TYPE_FTP 1 @@ -1219,7 +1232,7 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp); const char *ip_vs_state_name(__u16 proto, int state); void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp); -int ip_vs_check_template(struct ip_vs_conn *ct); +int ip_vs_check_template(struct ip_vs_conn *ct, struct ip_vs_dest *cdest); void ip_vs_random_dropentry(struct netns_ipvs *ipvs); int ip_vs_conn_init(void); void ip_vs_conn_cleanup(void); @@ -1378,6 +1391,10 @@ ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol, const union nf_inet_addr *daddr, __be16 dport); +struct ip_vs_dest * +ip_vs_find_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol, + const union nf_inet_addr *daddr, __be16 dport); + int ip_vs_use_count_inc(void); void ip_vs_use_count_dec(void); int ip_vs_register_nl_ioctl(void); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index d0aeb97aec5d..8fed1cd78658 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -121,21 +121,21 @@ struct frag_hdr { extern int sysctl_mld_max_msf; extern int sysctl_mld_qrv; -#define _DEVINC(net, statname, modifier, idev, field) \ +#define _DEVINC(net, statname, mod, idev, field) \ ({ \ struct inet6_dev *_idev = (idev); \ if (likely(_idev != NULL)) \ - SNMP_INC_STATS##modifier((_idev)->stats.statname, (field)); \ - SNMP_INC_STATS##modifier((net)->mib.statname##_statistics, (field));\ + mod##SNMP_INC_STATS64((_idev)->stats.statname, (field));\ + mod##SNMP_INC_STATS64((net)->mib.statname##_statistics, (field));\ }) /* per device counters are atomic_long_t */ -#define _DEVINCATOMIC(net, statname, modifier, idev, field) \ +#define _DEVINCATOMIC(net, statname, mod, idev, field) \ ({ \ struct inet6_dev *_idev = (idev); \ if (likely(_idev != NULL)) \ SNMP_INC_STATS_ATOMIC_LONG((_idev)->stats.statname##dev, (field)); \ - SNMP_INC_STATS##modifier((net)->mib.statname##_statistics, (field));\ + mod##SNMP_INC_STATS((net)->mib.statname##_statistics, (field));\ }) /* per device and per net counters are atomic_long_t */ @@ -147,46 +147,44 @@ extern int sysctl_mld_qrv; SNMP_INC_STATS_ATOMIC_LONG((net)->mib.statname##_statistics, (field));\ }) -#define _DEVADD(net, statname, modifier, idev, field, val) \ +#define _DEVADD(net, statname, mod, idev, field, val) \ ({ \ struct inet6_dev *_idev = (idev); \ if (likely(_idev != NULL)) \ - SNMP_ADD_STATS##modifier((_idev)->stats.statname, (field), (val)); \ - SNMP_ADD_STATS##modifier((net)->mib.statname##_statistics, (field), (val));\ + mod##SNMP_ADD_STATS((_idev)->stats.statname, (field), (val)); \ + mod##SNMP_ADD_STATS((net)->mib.statname##_statistics, (field), (val));\ }) -#define _DEVUPD(net, statname, modifier, idev, field, val) \ +#define _DEVUPD(net, statname, mod, idev, field, val) \ ({ \ struct inet6_dev *_idev = (idev); \ if (likely(_idev != NULL)) \ - SNMP_UPD_PO_STATS##modifier((_idev)->stats.statname, field, (val)); \ - SNMP_UPD_PO_STATS##modifier((net)->mib.statname##_statistics, field, (val));\ + mod##SNMP_UPD_PO_STATS((_idev)->stats.statname, field, (val)); \ + mod##SNMP_UPD_PO_STATS((net)->mib.statname##_statistics, field, (val));\ }) /* MIBs */ #define IP6_INC_STATS(net, idev,field) \ - _DEVINC(net, ipv6, 64, idev, field) -#define IP6_INC_STATS_BH(net, idev,field) \ - _DEVINC(net, ipv6, 64_BH, idev, field) + _DEVINC(net, ipv6, , idev, field) +#define __IP6_INC_STATS(net, idev,field) \ + _DEVINC(net, ipv6, __, idev, field) #define IP6_ADD_STATS(net, idev,field,val) \ - _DEVADD(net, ipv6, 64, idev, field, val) -#define IP6_ADD_STATS_BH(net, idev,field,val) \ - _DEVADD(net, ipv6, 64_BH, idev, field, val) + _DEVADD(net, ipv6, , idev, field, val) +#define __IP6_ADD_STATS(net, idev,field,val) \ + _DEVADD(net, ipv6, __, idev, field, val) #define IP6_UPD_PO_STATS(net, idev,field,val) \ - _DEVUPD(net, ipv6, 64, idev, field, val) -#define IP6_UPD_PO_STATS_BH(net, idev,field,val) \ - _DEVUPD(net, ipv6, 64_BH, idev, field, val) + _DEVUPD(net, ipv6, , idev, field, val) +#define __IP6_UPD_PO_STATS(net, idev,field,val) \ + _DEVUPD(net, ipv6, __, idev, field, val) #define ICMP6_INC_STATS(net, idev, field) \ _DEVINCATOMIC(net, icmpv6, , idev, field) -#define ICMP6_INC_STATS_BH(net, idev, field) \ - _DEVINCATOMIC(net, icmpv6, _BH, idev, field) +#define __ICMP6_INC_STATS(net, idev, field) \ + _DEVINCATOMIC(net, icmpv6, __, idev, field) #define ICMP6MSGOUT_INC_STATS(net, idev, field) \ _DEVINC_ATOMIC_ATOMIC(net, icmpv6msg, idev, field +256) -#define ICMP6MSGOUT_INC_STATS_BH(net, idev, field) \ - _DEVINC_ATOMIC_ATOMIC(net, icmpv6msg, idev, field +256) -#define ICMP6MSGIN_INC_STATS_BH(net, idev, field) \ +#define ICMP6MSGIN_INC_STATS(net, idev, field) \ _DEVINC_ATOMIC_ATOMIC(net, icmpv6msg, idev, field) struct ip6_ra_chain { @@ -253,6 +251,13 @@ struct ipv6_fl_socklist { struct rcu_head rcu; }; +struct ipcm6_cookie { + __s16 hlimit; + __s16 tclass; + __s8 dontfrag; + struct ipv6_txoptions *opt; +}; + static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np) { struct ipv6_txoptions *opt; @@ -308,11 +313,19 @@ struct ipv6_txoptions *ipv6_renew_options(struct sock *sk, int newtype, struct ipv6_opt_hdr __user *newopt, int newoptlen); +struct ipv6_txoptions * +ipv6_renew_options_kern(struct sock *sk, + struct ipv6_txoptions *opt, + int newtype, + struct ipv6_opt_hdr *newopt, + int newoptlen); struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space, struct ipv6_txoptions *opt); bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb, const struct inet6_skb_parm *opt); +struct ipv6_txoptions *ipv6_update_options(struct sock *sk, + struct ipv6_txoptions *opt); static inline bool ipv6_accept_ra(struct inet6_dev *idev) { @@ -865,9 +878,10 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr); int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), - void *from, int length, int transhdrlen, int hlimit, - int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, - struct rt6_info *rt, unsigned int flags, int dontfrag); + void *from, int length, int transhdrlen, + struct ipcm6_cookie *ipc6, struct flowi6 *fl6, + struct rt6_info *rt, unsigned int flags, + const struct sockcm_cookie *sockc); int ip6_push_pending_frames(struct sock *sk); @@ -882,9 +896,9 @@ struct sk_buff *ip6_make_skb(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, - int hlimit, int tclass, struct ipv6_txoptions *opt, - struct flowi6 *fl6, struct rt6_info *rt, - unsigned int flags, int dontfrag); + struct ipcm6_cookie *ipc6, struct flowi6 *fl6, + struct rt6_info *rt, unsigned int flags, + const struct sockcm_cookie *sockc); static inline struct sk_buff *ip6_finish_skb(struct sock *sk) { @@ -937,7 +951,7 @@ enum { int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, int target, unsigned short *fragoff, int *fragflg); -int ipv6_find_tlv(struct sk_buff *skb, int offset, int type); +int ipv6_find_tlv(const struct sk_buff *skb, int offset, int type); struct in6_addr *fl6_update_dst(struct flowi6 *fl6, const struct ipv6_txoptions *opt, @@ -959,6 +973,8 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, int ip6_datagram_connect(struct sock *sk, struct sockaddr *addr, int addr_len); int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *addr, int addr_len); +int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr); +void ip6_datagram_release_cb(struct sock *sk); int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len); diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index c43a9c73de5e..e90095091aa0 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -11,6 +11,8 @@ #ifndef _NET_L3MDEV_H_ #define _NET_L3MDEV_H_ +#include <net/fib_rules.h> + /** * struct l3mdev_ops - l3mdev operations * @@ -25,6 +27,8 @@ struct l3mdev_ops { u32 (*l3mdev_fib_table)(const struct net_device *dev); + struct sk_buff * (*l3mdev_l3_rcv)(struct net_device *dev, + struct sk_buff *skb, u16 proto); /* IPv4 ops */ struct rtable * (*l3mdev_get_rtable)(const struct net_device *dev, @@ -34,11 +38,17 @@ struct l3mdev_ops { /* IPv6 ops */ struct dst_entry * (*l3mdev_get_rt6_dst)(const struct net_device *dev, - const struct flowi6 *fl6); + struct flowi6 *fl6); + int (*l3mdev_get_saddr6)(struct net_device *dev, + const struct sock *sk, + struct flowi6 *fl6); }; #ifdef CONFIG_NET_L3_MASTER_DEV +int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, + struct fib_lookup_arg *arg); + int l3mdev_master_ifindex_rcu(const struct net_device *dev); static inline int l3mdev_master_ifindex(struct net_device *dev) { @@ -69,6 +79,31 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex) return rc; } +static inline +const struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev) +{ + /* netdev_master_upper_dev_get_rcu calls + * list_first_or_null_rcu to walk the upper dev list. + * list_first_or_null_rcu does not handle a const arg. We aren't + * making changes, just want the master device from that list so + * typecast to remove the const + */ + struct net_device *dev = (struct net_device *)_dev; + const struct net_device *master; + + if (!dev) + return NULL; + + if (netif_is_l3_master(dev)) + master = dev; + else if (netif_is_l3_slave(dev)) + master = netdev_master_upper_dev_get_rcu(dev); + else + master = NULL; + + return master; +} + /* get index of an interface to use for FIB lookups. For devices * enslaved to an L3 master device FIB lookups are based on the * master index @@ -130,51 +165,38 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex) return rc; } -static inline int l3mdev_get_saddr(struct net *net, int ifindex, - struct flowi4 *fl4) -{ - struct net_device *dev; - int rc = 0; +int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4); - if (ifindex) { +struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6); +int l3mdev_get_saddr6(struct net *net, const struct sock *sk, + struct flowi6 *fl6); - rcu_read_lock(); +static inline +struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto) +{ + struct net_device *master = NULL; - dev = dev_get_by_index_rcu(net, ifindex); - if (dev && netif_is_l3_master(dev) && - dev->l3mdev_ops->l3mdev_get_saddr) { - rc = dev->l3mdev_ops->l3mdev_get_saddr(dev, fl4); - } + if (netif_is_l3_slave(skb->dev)) + master = netdev_master_upper_dev_get_rcu(skb->dev); + else if (netif_is_l3_master(skb->dev)) + master = skb->dev; - rcu_read_unlock(); - } + if (master && master->l3mdev_ops->l3mdev_l3_rcv) + skb = master->l3mdev_ops->l3mdev_l3_rcv(master, skb, proto); - return rc; + return skb; } -static inline struct dst_entry *l3mdev_get_rt6_dst(const struct net_device *dev, - const struct flowi6 *fl6) +static inline +struct sk_buff *l3mdev_ip_rcv(struct sk_buff *skb) { - if (netif_is_l3_master(dev) && dev->l3mdev_ops->l3mdev_get_rt6_dst) - return dev->l3mdev_ops->l3mdev_get_rt6_dst(dev, fl6); - - return NULL; + return l3mdev_l3_rcv(skb, AF_INET); } static inline -struct dst_entry *l3mdev_rt6_dst_by_oif(struct net *net, - const struct flowi6 *fl6) +struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb) { - struct dst_entry *dst = NULL; - struct net_device *dev; - - dev = dev_get_by_index(net, fl6->flowi6_oif); - if (dev) { - dst = l3mdev_get_rt6_dst(dev, fl6); - dev_put(dev); - } - - return dst; + return l3mdev_l3_rcv(skb, AF_INET6); } #else @@ -193,6 +215,12 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex) return 0; } +static inline +const struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev) +{ + return NULL; +} + static inline int l3mdev_fib_oif_rcu(struct net_device *dev) { return dev ? dev->ifindex : 0; @@ -233,16 +261,34 @@ static inline int l3mdev_get_saddr(struct net *net, int ifindex, } static inline -struct dst_entry *l3mdev_get_rt6_dst(const struct net_device *dev, - const struct flowi6 *fl6) +struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6) { return NULL; } + +static inline int l3mdev_get_saddr6(struct net *net, const struct sock *sk, + struct flowi6 *fl6) +{ + return 0; +} + static inline -struct dst_entry *l3mdev_rt6_dst_by_oif(struct net *net, - const struct flowi6 *fl6) +struct sk_buff *l3mdev_ip_rcv(struct sk_buff *skb) { - return NULL; + return skb; +} + +static inline +struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb) +{ + return skb; +} + +static inline +int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, + struct fib_lookup_arg *arg) +{ + return 1; } #endif diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 0c09da34b67a..b4faadbb4e01 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -21,6 +21,7 @@ #include <linux/skbuff.h> #include <linux/ieee80211.h> #include <net/cfg80211.h> +#include <net/codel.h> #include <asm/unaligned.h> /** @@ -291,7 +292,7 @@ struct ieee80211_vif_chanctx_switch { * @BSS_CHANGED_PS: PS changed for this BSS (STA mode) * @BSS_CHANGED_TXPOWER: TX power setting changed for this interface * @BSS_CHANGED_P2P_PS: P2P powersave settings (CTWindow, opportunistic PS) - * changed (currently only in P2P client mode, GO mode will be later) + * changed * @BSS_CHANGED_BEACON_INFO: Data from the AP's beacon became available: * currently dtim_period only is under consideration. * @BSS_CHANGED_BANDWIDTH: The bandwidth used by this interface changed, @@ -526,6 +527,9 @@ struct ieee80211_mu_group_data { * userspace), whereas TPC is disabled if %txpower_type is set to * NL80211_TX_POWER_FIXED (use value configured from userspace) * @p2p_noa_attr: P2P NoA attribute for P2P powersave + * @allow_p2p_go_ps: indication for AP or P2P GO interface, whether it's allowed + * to use P2P PS mechanism or not. AP/P2P GO is not allowed to use P2P PS + * if it has associated clients without P2P PS support. */ struct ieee80211_bss_conf { const u8 *bssid; @@ -546,7 +550,7 @@ struct ieee80211_bss_conf { u8 sync_dtim_count; u32 basic_rates; struct ieee80211_rate *beacon_rate; - int mcast_rate[IEEE80211_NUM_BANDS]; + int mcast_rate[NUM_NL80211_BANDS]; u16 ht_operation_mode; s32 cqm_rssi_thold; u32 cqm_rssi_hyst; @@ -563,6 +567,7 @@ struct ieee80211_bss_conf { int txpower; enum nl80211_tx_power_setting txpower_type; struct ieee80211_p2p_noa_attr p2p_noa_attr; + bool allow_p2p_go_ps; }; /** @@ -709,6 +714,7 @@ enum mac80211_tx_info_flags { * @IEEE80211_TX_CTRL_PS_RESPONSE: This frame is a response to a poll * frame (PS-Poll or uAPSD). * @IEEE80211_TX_CTRL_RATE_INJECT: This frame is injected with rate information + * @IEEE80211_TX_CTRL_AMSDU: This frame is an A-MSDU frame * * These flags are used in tx_info->control.flags. */ @@ -716,6 +722,7 @@ enum mac80211_tx_control_flags { IEEE80211_TX_CTRL_PORT_CTRL_PROTO = BIT(0), IEEE80211_TX_CTRL_PS_RESPONSE = BIT(1), IEEE80211_TX_CTRL_RATE_INJECT = BIT(2), + IEEE80211_TX_CTRL_AMSDU = BIT(3), }; /* @@ -889,7 +896,18 @@ struct ieee80211_tx_info { unsigned long jiffies; }; /* NB: vif can be NULL for injected frames */ - struct ieee80211_vif *vif; + union { + /* NB: vif can be NULL for injected frames */ + struct ieee80211_vif *vif; + + /* When packets are enqueued on txq it's easy + * to re-construct the vif pointer. There's no + * more space in tx_info so it can be used to + * store the necessary enqueue time for packet + * sojourn time computation. + */ + codel_time_t enqueue_time; + }; struct ieee80211_key_conf *hw_key; u32 flags; /* 4 bytes free */ @@ -932,8 +950,8 @@ struct ieee80211_tx_info { * @common_ie_len: length of the common_ies */ struct ieee80211_scan_ies { - const u8 *ies[IEEE80211_NUM_BANDS]; - size_t len[IEEE80211_NUM_BANDS]; + const u8 *ies[NUM_NL80211_BANDS]; + size_t len[NUM_NL80211_BANDS]; const u8 *common_ies; size_t common_ie_len; }; @@ -1001,6 +1019,8 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * flag indicates that the PN was verified for replay protection. * Note that this flag is also currently only supported when a frame * is also decrypted (ie. @RX_FLAG_DECRYPTED must be set) + * @RX_FLAG_DUP_VALIDATED: The driver should set this flag if it did + * de-duplication by itself. * @RX_FLAG_FAILED_FCS_CRC: Set this flag if the FCS check failed on * the frame. * @RX_FLAG_FAILED_PLCP_CRC: Set this flag if the PCLP check failed on @@ -1034,6 +1054,8 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * on this subframe * @RX_FLAG_AMPDU_DELIM_CRC_KNOWN: The delimiter CRC field is known (the CRC * is stored in the @ampdu_delimiter_crc field) + * @RX_FLAG_MIC_STRIPPED: The mic was stripped of this packet. Decryption was + * done by the hardware * @RX_FLAG_LDPC: LDPC was used * @RX_FLAG_ONLY_MONITOR: Report frame only to monitor interfaces without * processing it in any regular way. @@ -1058,6 +1080,9 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * @RX_FLAG_RADIOTAP_VENDOR_DATA: This frame contains vendor-specific * radiotap data in the skb->data (before the frame) as described by * the &struct ieee80211_vendor_radiotap. + * @RX_FLAG_ALLOW_SAME_PN: Allow the same PN as same packet before. + * This is used for AMSDU subframes which can have the same PN as + * the first subframe. */ enum mac80211_rx_flags { RX_FLAG_MMIC_ERROR = BIT(0), @@ -1091,6 +1116,8 @@ enum mac80211_rx_flags { RX_FLAG_5MHZ = BIT(29), RX_FLAG_AMSDU_MORE = BIT(30), RX_FLAG_RADIOTAP_VENDOR_DATA = BIT(31), + RX_FLAG_MIC_STRIPPED = BIT_ULL(32), + RX_FLAG_ALLOW_SAME_PN = BIT_ULL(33), }; #define RX_FLAG_STBC_SHIFT 26 @@ -1120,6 +1147,8 @@ enum mac80211_rx_vht_flags { * * @mactime: value in microseconds of the 64-bit Time Synchronization Function * (TSF) timer when the first data symbol (MPDU) arrived at the hardware. + * @boottime_ns: CLOCK_BOOTTIME timestamp the frame was received at, this is + * needed only for beacons and probe responses that update the scan cache. * @device_timestamp: arbitrary timestamp for the device, mac80211 doesn't use * it but can store it and pass it back to the driver for synchronisation * @band: the active band when this frame was received @@ -1146,9 +1175,10 @@ enum mac80211_rx_vht_flags { */ struct ieee80211_rx_status { u64 mactime; + u64 boottime_ns; u32 device_timestamp; u32 ampdu_reference; - u32 flag; + u64 flag; u16 freq; u8 vht_flag; u8 rate_idx; @@ -1735,10 +1765,12 @@ struct ieee80211_sta_rates { * size is min(max_amsdu_len, 7935) bytes. * Both additional HT limits must be enforced by the low level driver. * This is defined by the spec (IEEE 802.11-2012 section 8.3.2.2 NOTE 2). + * @support_p2p_ps: indicates whether the STA supports P2P PS mechanism or not. + * @max_rc_amsdu_len: Maximum A-MSDU size in bytes recommended by rate control. * @txq: per-TID data TX queues (if driver uses the TXQ abstraction) */ struct ieee80211_sta { - u32 supp_rates[IEEE80211_NUM_BANDS]; + u32 supp_rates[NUM_NL80211_BANDS]; u8 addr[ETH_ALEN]; u16 aid; struct ieee80211_sta_ht_cap ht_cap; @@ -1755,6 +1787,8 @@ struct ieee80211_sta { bool mfp; u8 max_amsdu_subframes; u16 max_amsdu_len; + bool support_p2p_ps; + u16 max_rc_amsdu_len; struct ieee80211_txq *txq[IEEE80211_NUM_TIDS]; @@ -1968,6 +2002,18 @@ struct ieee80211_txq { * order and does not need to manage its own reorder buffer or BA session * timeout. * + * @IEEE80211_HW_USES_RSS: The device uses RSS and thus requires parallel RX, + * which implies using per-CPU station statistics. + * + * @IEEE80211_HW_TX_AMSDU: Hardware (or driver) supports software aggregated + * A-MSDU frames. Requires software tx queueing and fast-xmit support. + * When not using minstrel/minstrel_ht rate control, the driver must + * limit the maximum A-MSDU size based on the current tx rate by setting + * max_rc_amsdu_len in struct ieee80211_sta. + * + * @IEEE80211_HW_TX_FRAG_LIST: Hardware (or driver) supports sending frag_list + * skbs, needed for zero-copy software A-MSDU. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2005,6 +2051,9 @@ enum ieee80211_hw_flags { IEEE80211_HW_BEACON_TX_STATUS, IEEE80211_HW_NEEDS_UNIQUE_STA_ADDR, IEEE80211_HW_SUPPORTS_REORDERING_BUFFER, + IEEE80211_HW_USES_RSS, + IEEE80211_HW_TX_AMSDU, + IEEE80211_HW_TX_FRAG_LIST, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS @@ -2077,6 +2126,9 @@ enum ieee80211_hw_flags { * size is smaller (an example is LinkSys WRT120N with FW v1.0.07 * build 002 Jun 18 2012). * + * @max_tx_fragments: maximum number of tx buffers per (A)-MSDU, sum + * of 1 + skb_shinfo(skb)->nr_frags for each skb in the frag_list. + * * @offchannel_tx_hw_queue: HW queue ID to use for offchannel TX * (if %IEEE80211_HW_QUEUE_CONTROL is set) * @@ -2107,9 +2159,6 @@ enum ieee80211_hw_flags { * @n_cipher_schemes: a size of an array of cipher schemes definitions. * @cipher_schemes: a pointer to an array of cipher scheme definitions * supported by HW. - * - * @txq_ac_max_pending: maximum number of frames per AC pending in all txq - * entries for a vif. */ struct ieee80211_hw { struct ieee80211_conf conf; @@ -2131,6 +2180,7 @@ struct ieee80211_hw { u8 max_rate_tries; u8 max_rx_aggregation_subframes; u8 max_tx_aggregation_subframes; + u8 max_tx_fragments; u8 offchannel_tx_hw_queue; u8 radiotap_mcs_details; u16 radiotap_vht_details; @@ -2139,7 +2189,6 @@ struct ieee80211_hw { u8 uapsd_max_sp_len; u8 n_cipher_schemes; const struct ieee80211_cipher_scheme *cipher_schemes; - int txq_ac_max_pending; }; static inline bool _ieee80211_hw_check(struct ieee80211_hw *hw, @@ -3348,6 +3397,10 @@ enum ieee80211_reconfig_type { * the function call. * * @wake_tx_queue: Called when new packets have been added to the queue. + * @sync_rx_queues: Process all pending frames in RSS queues. This is a + * synchronization which is needed in case driver has in its RSS queues + * pending frames that were received prior to the control path action + * currently taken (e.g. disassociation) but are not processed yet. */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, @@ -3585,6 +3638,7 @@ struct ieee80211_ops { void (*wake_tx_queue)(struct ieee80211_hw *hw, struct ieee80211_txq *txq); + void (*sync_rx_queues)(struct ieee80211_hw *hw); }; /** @@ -3838,11 +3892,12 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw); * This function must be called with BHs disabled. * * @hw: the hardware this frame came in on + * @sta: the station the frame was received from, or %NULL * @skb: the buffer to receive, owned by mac80211 after this call * @napi: the NAPI context */ -void ieee80211_rx_napi(struct ieee80211_hw *hw, struct sk_buff *skb, - struct napi_struct *napi); +void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *sta, + struct sk_buff *skb, struct napi_struct *napi); /** * ieee80211_rx - receive frame @@ -3866,7 +3921,7 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct sk_buff *skb, */ static inline void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb) { - ieee80211_rx_napi(hw, skb, NULL); + ieee80211_rx_napi(hw, NULL, skb, NULL); } /** @@ -3949,6 +4004,33 @@ static inline int ieee80211_sta_ps_transition_ni(struct ieee80211_sta *sta, return ret; } +/** + * ieee80211_sta_pspoll - PS-Poll frame received + * @sta: currently connected station + * + * When operating in AP mode with the %IEEE80211_HW_AP_LINK_PS flag set, + * use this function to inform mac80211 that a PS-Poll frame from a + * connected station was received. + * This must be used in conjunction with ieee80211_sta_ps_transition() + * and possibly ieee80211_sta_uapsd_trigger(); calls to all three must + * be serialized. + */ +void ieee80211_sta_pspoll(struct ieee80211_sta *sta); + +/** + * ieee80211_sta_uapsd_trigger - (potential) U-APSD trigger frame received + * @sta: currently connected station + * @tid: TID of the received (potential) trigger frame + * + * When operating in AP mode with the %IEEE80211_HW_AP_LINK_PS flag set, + * use this function to inform mac80211 that a (potential) trigger frame + * from a connected station was received. + * This must be used in conjunction with ieee80211_sta_ps_transition() + * and possibly ieee80211_sta_pspoll(); calls to all three must be + * serialized. + */ +void ieee80211_sta_uapsd_trigger(struct ieee80211_sta *sta, u8 tid); + /* * The TX headroom reserved by mac80211 for its own tx_status functions. * This is enough for the radiotap header. @@ -4361,7 +4443,7 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw, */ __le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - enum ieee80211_band band, + enum nl80211_band band, size_t frame_len, struct ieee80211_rate *rate); @@ -4615,9 +4697,10 @@ void ieee80211_wake_queues(struct ieee80211_hw *hw); * any context, including hardirq context. * * @hw: the hardware that finished the scan - * @aborted: set to true if scan was aborted + * @info: information about the completed scan */ -void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted); +void ieee80211_scan_completed(struct ieee80211_hw *hw, + struct cfg80211_scan_info *info); /** * ieee80211_sched_scan_results - got results from scheduled scan @@ -5314,7 +5397,7 @@ struct rate_control_ops { }; static inline int rate_supported(struct ieee80211_sta *sta, - enum ieee80211_band band, + enum nl80211_band band, int index) { return (sta == NULL || sta->supp_rates[band] & BIT(index)); diff --git a/include/net/mac802154.h b/include/net/mac802154.h index 6cd7a70706a9..286824acd008 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -247,14 +247,123 @@ struct ieee802154_ops { */ static inline __le16 ieee802154_get_fc_from_skb(const struct sk_buff *skb) { + __le16 fc; + /* check if we can fc at skb_mac_header of sk buffer */ - if (unlikely(!skb_mac_header_was_set(skb) || - (skb_tail_pointer(skb) - skb_mac_header(skb)) < 2)) { - WARN_ON(1); + if (WARN_ON(!skb_mac_header_was_set(skb) || + (skb_tail_pointer(skb) - + skb_mac_header(skb)) < IEEE802154_FC_LEN)) return cpu_to_le16(0); + + memcpy(&fc, skb_mac_header(skb), IEEE802154_FC_LEN); + return fc; +} + +/** + * ieee802154_skb_dst_pan - get the pointer to destination pan field + * @fc: mac header frame control field + * @skb: skb where the destination pan pointer will be get from + */ +static inline unsigned char *ieee802154_skb_dst_pan(__le16 fc, + const struct sk_buff *skb) +{ + unsigned char *dst_pan; + + switch (ieee802154_daddr_mode(fc)) { + case cpu_to_le16(IEEE802154_FCTL_ADDR_NONE): + dst_pan = NULL; + break; + case cpu_to_le16(IEEE802154_FCTL_DADDR_SHORT): + case cpu_to_le16(IEEE802154_FCTL_DADDR_EXTENDED): + dst_pan = skb_mac_header(skb) + + IEEE802154_FC_LEN + + IEEE802154_SEQ_LEN; + break; + default: + WARN_ONCE(1, "invalid addr mode detected"); + dst_pan = NULL; + break; } - return get_unaligned_le16(skb_mac_header(skb)); + return dst_pan; +} + +/** + * ieee802154_skb_src_pan - get the pointer to source pan field + * @fc: mac header frame control field + * @skb: skb where the source pan pointer will be get from + */ +static inline unsigned char *ieee802154_skb_src_pan(__le16 fc, + const struct sk_buff *skb) +{ + unsigned char *src_pan; + + switch (ieee802154_saddr_mode(fc)) { + case cpu_to_le16(IEEE802154_FCTL_ADDR_NONE): + src_pan = NULL; + break; + case cpu_to_le16(IEEE802154_FCTL_SADDR_SHORT): + case cpu_to_le16(IEEE802154_FCTL_SADDR_EXTENDED): + /* if intra-pan and source addr mode is non none, + * then source pan id is equal destination pan id. + */ + if (ieee802154_is_intra_pan(fc)) { + src_pan = ieee802154_skb_dst_pan(fc, skb); + break; + } + + switch (ieee802154_daddr_mode(fc)) { + case cpu_to_le16(IEEE802154_FCTL_ADDR_NONE): + src_pan = skb_mac_header(skb) + + IEEE802154_FC_LEN + + IEEE802154_SEQ_LEN; + break; + case cpu_to_le16(IEEE802154_FCTL_DADDR_SHORT): + src_pan = skb_mac_header(skb) + + IEEE802154_FC_LEN + + IEEE802154_SEQ_LEN + + IEEE802154_PAN_ID_LEN + + IEEE802154_SHORT_ADDR_LEN; + break; + case cpu_to_le16(IEEE802154_FCTL_DADDR_EXTENDED): + src_pan = skb_mac_header(skb) + + IEEE802154_FC_LEN + + IEEE802154_SEQ_LEN + + IEEE802154_PAN_ID_LEN + + IEEE802154_EXTENDED_ADDR_LEN; + break; + default: + WARN_ONCE(1, "invalid addr mode detected"); + src_pan = NULL; + break; + } + break; + default: + WARN_ONCE(1, "invalid addr mode detected"); + src_pan = NULL; + break; + } + + return src_pan; +} + +/** + * ieee802154_skb_is_intra_pan_addressing - checks whenever the mac addressing + * is an intra pan communication + * @fc: mac header frame control field + * @skb: skb where the source and destination pan should be get from + */ +static inline bool ieee802154_skb_is_intra_pan_addressing(__le16 fc, + const struct sk_buff *skb) +{ + unsigned char *dst_pan = ieee802154_skb_dst_pan(fc, skb), + *src_pan = ieee802154_skb_src_pan(fc, skb); + + /* if one is NULL is no intra pan addressing */ + if (!dst_pan || !src_pan) + return false; + + return !memcmp(dst_pan, src_pan, IEEE802154_PAN_ID_LEN); } /** @@ -288,6 +397,16 @@ static inline void ieee802154_le16_to_be16(void *be16_dst, const void *le16_src) } /** + * ieee802154_be16_to_le16 - copies and convert be16 to le16 + * @le16_dst: le16 destination pointer + * @be16_src: be16 source pointer + */ +static inline void ieee802154_be16_to_le16(void *le16_dst, const void *be16_src) +{ + put_unaligned_le16(get_unaligned_be16(be16_src), le16_dst); +} + +/** * ieee802154_alloc_hw - Allocate a new hardware device * * This must be called once for each hardware device. The returned pointer diff --git a/include/net/ncsi.h b/include/net/ncsi.h new file mode 100644 index 000000000000..1dbf42f79750 --- /dev/null +++ b/include/net/ncsi.h @@ -0,0 +1,52 @@ +#ifndef __NET_NCSI_H +#define __NET_NCSI_H + +/* + * The NCSI device states seen from external. More NCSI device states are + * only visible internally (in net/ncsi/internal.h). When the NCSI device + * is registered, it's in ncsi_dev_state_registered state. The state + * ncsi_dev_state_start is used to drive to choose active package and + * channel. After that, its state is changed to ncsi_dev_state_functional. + * + * The state ncsi_dev_state_stop helps to shut down the currently active + * package and channel while ncsi_dev_state_config helps to reconfigure + * them. + */ +enum { + ncsi_dev_state_registered = 0x0000, + ncsi_dev_state_functional = 0x0100, + ncsi_dev_state_probe = 0x0200, + ncsi_dev_state_config = 0x0300, + ncsi_dev_state_suspend = 0x0400, +}; + +struct ncsi_dev { + int state; + int link_up; + struct net_device *dev; + void (*handler)(struct ncsi_dev *ndev); +}; + +#ifdef CONFIG_NET_NCSI +struct ncsi_dev *ncsi_register_dev(struct net_device *dev, + void (*notifier)(struct ncsi_dev *nd)); +int ncsi_start_dev(struct ncsi_dev *nd); +void ncsi_unregister_dev(struct ncsi_dev *nd); +#else /* !CONFIG_NET_NCSI */ +static inline struct ncsi_dev *ncsi_register_dev(struct net_device *dev, + void (*notifier)(struct ncsi_dev *nd)) +{ + return NULL; +} + +static inline int ncsi_start_dev(struct ncsi_dev *nd) +{ + return -ENOTTY; +} + +static inline void ncsi_unregister_dev(struct ncsi_dev *nd) +{ +} +#endif /* CONFIG_NET_NCSI */ + +#endif /* __NET_NCSI_H */ diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 2d8edaad29cb..be1fe2283254 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -35,6 +35,7 @@ enum { ND_OPT_ROUTE_INFO = 24, /* RFC4191 */ ND_OPT_RDNSS = 25, /* RFC5006 */ ND_OPT_DNSSL = 31, /* RFC6106 */ + ND_OPT_6CO = 34, /* RFC6775 */ __ND_OPT_MAX }; @@ -53,11 +54,21 @@ enum { #include <net/neighbour.h> +/* Set to 3 to get tracing... */ +#define ND_DEBUG 1 + +#define ND_PRINTK(val, level, fmt, ...) \ +do { \ + if (val <= ND_DEBUG) \ + net_##level##_ratelimited(fmt, ##__VA_ARGS__); \ +} while (0) + struct ctl_table; struct inet6_dev; struct net_device; struct net_proto_family; struct sk_buff; +struct prefix_info; extern struct neigh_table nd_tbl; @@ -99,20 +110,201 @@ struct ndisc_options { #endif struct nd_opt_hdr *nd_useropts; struct nd_opt_hdr *nd_useropts_end; +#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN) + struct nd_opt_hdr *nd_802154_opt_array[ND_OPT_TARGET_LL_ADDR + 1]; +#endif }; -#define nd_opts_src_lladdr nd_opt_array[ND_OPT_SOURCE_LL_ADDR] -#define nd_opts_tgt_lladdr nd_opt_array[ND_OPT_TARGET_LL_ADDR] -#define nd_opts_pi nd_opt_array[ND_OPT_PREFIX_INFO] -#define nd_opts_pi_end nd_opt_array[__ND_OPT_PREFIX_INFO_END] -#define nd_opts_rh nd_opt_array[ND_OPT_REDIRECT_HDR] -#define nd_opts_mtu nd_opt_array[ND_OPT_MTU] +#define nd_opts_src_lladdr nd_opt_array[ND_OPT_SOURCE_LL_ADDR] +#define nd_opts_tgt_lladdr nd_opt_array[ND_OPT_TARGET_LL_ADDR] +#define nd_opts_pi nd_opt_array[ND_OPT_PREFIX_INFO] +#define nd_opts_pi_end nd_opt_array[__ND_OPT_PREFIX_INFO_END] +#define nd_opts_rh nd_opt_array[ND_OPT_REDIRECT_HDR] +#define nd_opts_mtu nd_opt_array[ND_OPT_MTU] +#define nd_802154_opts_src_lladdr nd_802154_opt_array[ND_OPT_SOURCE_LL_ADDR] +#define nd_802154_opts_tgt_lladdr nd_802154_opt_array[ND_OPT_TARGET_LL_ADDR] #define NDISC_OPT_SPACE(len) (((len)+2+7)&~7) -struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, +struct ndisc_options *ndisc_parse_options(const struct net_device *dev, + u8 *opt, int opt_len, struct ndisc_options *ndopts); +void __ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data, + int data_len, int pad); + +#define NDISC_OPS_REDIRECT_DATA_SPACE 2 + +/* + * This structure defines the hooks for IPv6 neighbour discovery. + * The following hooks can be defined; unless noted otherwise, they are + * optional and can be filled with a null pointer. + * + * int (*is_useropt)(u8 nd_opt_type): + * This function is called when IPv6 decide RA userspace options. if + * this function returns 1 then the option given by nd_opt_type will + * be handled as userspace option additional to the IPv6 options. + * + * int (*parse_options)(const struct net_device *dev, + * struct nd_opt_hdr *nd_opt, + * struct ndisc_options *ndopts): + * This function is called while parsing ndisc ops and put each position + * as pointer into ndopts. If this function return unequal 0, then this + * function took care about the ndisc option, if 0 then the IPv6 ndisc + * option parser will take care about that option. + * + * void (*update)(const struct net_device *dev, struct neighbour *n, + * u32 flags, u8 icmp6_type, + * const struct ndisc_options *ndopts): + * This function is called when IPv6 ndisc updates the neighbour cache + * entry. Additional options which can be updated may be previously + * parsed by parse_opts callback and accessible over ndopts parameter. + * + * int (*opt_addr_space)(const struct net_device *dev, u8 icmp6_type, + * struct neighbour *neigh, u8 *ha_buf, + * u8 **ha): + * This function is called when the necessary option space will be + * calculated before allocating a skb. The parameters neigh, ha_buf + * abd ha are available on NDISC_REDIRECT messages only. + * + * void (*fill_addr_option)(const struct net_device *dev, + * struct sk_buff *skb, u8 icmp6_type, + * const u8 *ha): + * This function is called when the skb will finally fill the option + * fields inside skb. NOTE: this callback should fill the option + * fields to the skb which are previously indicated by opt_space + * parameter. That means the decision to add such option should + * not lost between these two callbacks, e.g. protected by interface + * up state. + * + * void (*prefix_rcv_add_addr)(struct net *net, struct net_device *dev, + * const struct prefix_info *pinfo, + * struct inet6_dev *in6_dev, + * struct in6_addr *addr, + * int addr_type, u32 addr_flags, + * bool sllao, bool tokenized, + * __u32 valid_lft, u32 prefered_lft, + * bool dev_addr_generated): + * This function is called when a RA messages is received with valid + * PIO option fields and an IPv6 address will be added to the interface + * for autoconfiguration. The parameter dev_addr_generated reports about + * if the address was based on dev->dev_addr or not. This can be used + * to add a second address if link-layer operates with two link layer + * addresses. E.g. 802.15.4 6LoWPAN. + */ +struct ndisc_ops { + int (*is_useropt)(u8 nd_opt_type); + int (*parse_options)(const struct net_device *dev, + struct nd_opt_hdr *nd_opt, + struct ndisc_options *ndopts); + void (*update)(const struct net_device *dev, struct neighbour *n, + u32 flags, u8 icmp6_type, + const struct ndisc_options *ndopts); + int (*opt_addr_space)(const struct net_device *dev, u8 icmp6_type, + struct neighbour *neigh, u8 *ha_buf, + u8 **ha); + void (*fill_addr_option)(const struct net_device *dev, + struct sk_buff *skb, u8 icmp6_type, + const u8 *ha); + void (*prefix_rcv_add_addr)(struct net *net, struct net_device *dev, + const struct prefix_info *pinfo, + struct inet6_dev *in6_dev, + struct in6_addr *addr, + int addr_type, u32 addr_flags, + bool sllao, bool tokenized, + __u32 valid_lft, u32 prefered_lft, + bool dev_addr_generated); +}; + +#if IS_ENABLED(CONFIG_IPV6) +static inline int ndisc_ops_is_useropt(const struct net_device *dev, + u8 nd_opt_type) +{ + if (dev->ndisc_ops && dev->ndisc_ops->is_useropt) + return dev->ndisc_ops->is_useropt(nd_opt_type); + else + return 0; +} + +static inline int ndisc_ops_parse_options(const struct net_device *dev, + struct nd_opt_hdr *nd_opt, + struct ndisc_options *ndopts) +{ + if (dev->ndisc_ops && dev->ndisc_ops->parse_options) + return dev->ndisc_ops->parse_options(dev, nd_opt, ndopts); + else + return 0; +} + +static inline void ndisc_ops_update(const struct net_device *dev, + struct neighbour *n, u32 flags, + u8 icmp6_type, + const struct ndisc_options *ndopts) +{ + if (dev->ndisc_ops && dev->ndisc_ops->update) + dev->ndisc_ops->update(dev, n, flags, icmp6_type, ndopts); +} + +static inline int ndisc_ops_opt_addr_space(const struct net_device *dev, + u8 icmp6_type) +{ + if (dev->ndisc_ops && dev->ndisc_ops->opt_addr_space && + icmp6_type != NDISC_REDIRECT) + return dev->ndisc_ops->opt_addr_space(dev, icmp6_type, NULL, + NULL, NULL); + else + return 0; +} + +static inline int ndisc_ops_redirect_opt_addr_space(const struct net_device *dev, + struct neighbour *neigh, + u8 *ha_buf, u8 **ha) +{ + if (dev->ndisc_ops && dev->ndisc_ops->opt_addr_space) + return dev->ndisc_ops->opt_addr_space(dev, NDISC_REDIRECT, + neigh, ha_buf, ha); + else + return 0; +} + +static inline void ndisc_ops_fill_addr_option(const struct net_device *dev, + struct sk_buff *skb, + u8 icmp6_type) +{ + if (dev->ndisc_ops && dev->ndisc_ops->fill_addr_option && + icmp6_type != NDISC_REDIRECT) + dev->ndisc_ops->fill_addr_option(dev, skb, icmp6_type, NULL); +} + +static inline void ndisc_ops_fill_redirect_addr_option(const struct net_device *dev, + struct sk_buff *skb, + const u8 *ha) +{ + if (dev->ndisc_ops && dev->ndisc_ops->fill_addr_option) + dev->ndisc_ops->fill_addr_option(dev, skb, NDISC_REDIRECT, ha); +} + +static inline void ndisc_ops_prefix_rcv_add_addr(struct net *net, + struct net_device *dev, + const struct prefix_info *pinfo, + struct inet6_dev *in6_dev, + struct in6_addr *addr, + int addr_type, u32 addr_flags, + bool sllao, bool tokenized, + __u32 valid_lft, + u32 prefered_lft, + bool dev_addr_generated) +{ + if (dev->ndisc_ops && dev->ndisc_ops->prefix_rcv_add_addr) + dev->ndisc_ops->prefix_rcv_add_addr(net, dev, pinfo, in6_dev, + addr, addr_type, + addr_flags, sllao, + tokenized, valid_lft, + prefered_lft, + dev_addr_generated); +} +#endif + /* * Return the padding between the option length and the start of the * link addr. Currently only IP-over-InfiniBand needs this, although @@ -127,23 +319,48 @@ static inline int ndisc_addr_option_pad(unsigned short type) } } -static inline int ndisc_opt_addr_space(struct net_device *dev) +static inline int __ndisc_opt_addr_space(unsigned char addr_len, int pad) { - return NDISC_OPT_SPACE(dev->addr_len + - ndisc_addr_option_pad(dev->type)); + return NDISC_OPT_SPACE(addr_len + pad); } -static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p, - struct net_device *dev) +#if IS_ENABLED(CONFIG_IPV6) +static inline int ndisc_opt_addr_space(struct net_device *dev, u8 icmp6_type) +{ + return __ndisc_opt_addr_space(dev->addr_len, + ndisc_addr_option_pad(dev->type)) + + ndisc_ops_opt_addr_space(dev, icmp6_type); +} + +static inline int ndisc_redirect_opt_addr_space(struct net_device *dev, + struct neighbour *neigh, + u8 *ops_data_buf, + u8 **ops_data) +{ + return __ndisc_opt_addr_space(dev->addr_len, + ndisc_addr_option_pad(dev->type)) + + ndisc_ops_redirect_opt_addr_space(dev, neigh, ops_data_buf, + ops_data); +} +#endif + +static inline u8 *__ndisc_opt_addr_data(struct nd_opt_hdr *p, + unsigned char addr_len, int prepad) { u8 *lladdr = (u8 *)(p + 1); int lladdrlen = p->nd_opt_len << 3; - int prepad = ndisc_addr_option_pad(dev->type); - if (lladdrlen != ndisc_opt_addr_space(dev)) + if (lladdrlen != __ndisc_opt_addr_space(addr_len, prepad)) return NULL; return lladdr + prepad; } +static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p, + struct net_device *dev) +{ + return __ndisc_opt_addr_data(p, dev->addr_len, + ndisc_addr_option_pad(dev->type)); +} + static inline u32 ndisc_hashfn(const void *pkey, const struct net_device *dev, __u32 *hash_rnd) { const u32 *p32 = pkey; @@ -194,6 +411,9 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target); int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device *dev, int dir); +void ndisc_update(const struct net_device *dev, struct neighbour *neigh, + const u8 *lladdr, u8 new, u32 flags, u8 icmp6_type, + struct ndisc_options *ndopts); /* * IGMP diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 4089abc6e9c0..0933c7455a30 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -275,7 +275,7 @@ static inline struct net *read_pnet(const possible_net_t *pnet) #define __net_initconst #else #define __net_init __init -#define __net_exit __exit_refok +#define __net_exit __ref #define __net_initdata __initdata #define __net_initconst __initconst #endif diff --git a/include/net/netevent.h b/include/net/netevent.h index d8bbb38584b6..f440df172b56 100644 --- a/include/net/netevent.h +++ b/include/net/netevent.h @@ -24,6 +24,7 @@ struct netevent_redirect { enum netevent_notif_type { NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ + NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */ }; int register_netevent_notifier(struct notifier_block *nb); diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index fde4068eec0b..445b019c2078 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -17,6 +17,7 @@ #include <linux/bitops.h> #include <linux/compiler.h> #include <linux/atomic.h> +#include <linux/rhashtable.h> #include <linux/netfilter/nf_conntrack_tcp.h> #include <linux/netfilter/nf_conntrack_dccp.h> @@ -85,6 +86,9 @@ struct nf_conn { spinlock_t lock; u16 cpu; +#ifdef CONFIG_NF_CONNTRACK_ZONES + struct nf_conntrack_zone zone; +#endif /* XXX should I move this to the tail ? - Y.K */ /* These are my tuples; original and reply */ struct nf_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX]; @@ -114,6 +118,9 @@ struct nf_conn { /* Extensions */ struct nf_ct_ext *ext; +#if IS_ENABLED(CONFIG_NF_NAT) + struct rhash_head nat_bysource; +#endif /* Storage reserved for other modules, must be the last member */ union nf_conntrack_proto proto; }; @@ -263,12 +270,12 @@ static inline int nf_ct_is_template(const struct nf_conn *ct) } /* It's confirmed if it is, or has been in the hash table. */ -static inline int nf_ct_is_confirmed(struct nf_conn *ct) +static inline int nf_ct_is_confirmed(const struct nf_conn *ct) { return test_bit(IPS_CONFIRMED_BIT, &ct->status); } -static inline int nf_ct_is_dying(struct nf_conn *ct) +static inline int nf_ct_is_dying(const struct nf_conn *ct) { return test_bit(IPS_DYING_BIT, &ct->status); } @@ -284,13 +291,20 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb) return skb->dev && skb->skb_iif && skb->dev->flags & IFF_LOOPBACK; } +/* jiffies until ct expires, 0 if already expired */ +static inline unsigned long nf_ct_expires(const struct nf_conn *ct) +{ + long timeout = (long)ct->timeout.expires - (long)jiffies; + + return timeout > 0 ? timeout : 0; +} + struct kernel_param; int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp); +int nf_conntrack_hash_resize(unsigned int hashsize); extern unsigned int nf_conntrack_htable_size; extern unsigned int nf_conntrack_max; -extern unsigned int nf_conntrack_hash_rnd; -void init_nf_conntrack_hash_rnd(void); struct nf_conn *nf_ct_tmpl_alloc(struct net *net, const struct nf_conntrack_zone *zone, @@ -299,6 +313,7 @@ void nf_ct_tmpl_free(struct nf_conn *tmpl); #define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count) #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count) +#define NF_CT_STAT_ADD_ATOMIC(net, count, v) this_cpu_add((net)->ct.stat->count, (v)) #define MODULE_ALIAS_NFCT_HELPER(helper) \ MODULE_ALIAS("nfct-helper-" helper) diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 62e17d1319ff..79d7ac5c9740 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -51,6 +51,8 @@ bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, const struct nf_conntrack_l3proto *l3proto, const struct nf_conntrack_l4proto *l4proto); +void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize); + /* Find a connection corresponding to a tuple. */ struct nf_conntrack_tuple_hash * nf_conntrack_find_get(struct net *net, @@ -81,6 +83,7 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, #define CONNTRACK_LOCKS 1024 +extern struct hlist_nulls_head *nf_conntrack_hash; extern spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS]; void nf_conntrack_lock(spinlock_t *lock); diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 57c880378443..fa36447371c6 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -73,6 +73,8 @@ void nf_conntrack_unregister_notifier(struct net *net, struct nf_ct_event_notifier *nb); void nf_ct_deliver_cached_events(struct nf_conn *ct); +int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, + u32 portid, int report); static inline void nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) @@ -91,69 +93,25 @@ nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) } static inline int -nf_conntrack_eventmask_report(unsigned int eventmask, - struct nf_conn *ct, - u32 portid, - int report) -{ - int ret = 0; - struct net *net = nf_ct_net(ct); - struct nf_ct_event_notifier *notify; - struct nf_conntrack_ecache *e; - - rcu_read_lock(); - notify = rcu_dereference(net->ct.nf_conntrack_event_cb); - if (notify == NULL) - goto out_unlock; - - e = nf_ct_ecache_find(ct); - if (e == NULL) - goto out_unlock; - - if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) { - struct nf_ct_event item = { - .ct = ct, - .portid = e->portid ? e->portid : portid, - .report = report - }; - /* This is a resent of a destroy event? If so, skip missed */ - unsigned long missed = e->portid ? 0 : e->missed; - - if (!((eventmask | missed) & e->ctmask)) - goto out_unlock; - - ret = notify->fcn(eventmask | missed, &item); - if (unlikely(ret < 0 || missed)) { - spin_lock_bh(&ct->lock); - if (ret < 0) { - /* This is a destroy event that has been - * triggered by a process, we store the PORTID - * to include it in the retransmission. */ - if (eventmask & (1 << IPCT_DESTROY) && - e->portid == 0 && portid != 0) - e->portid = portid; - else - e->missed |= eventmask; - } else - e->missed &= ~missed; - spin_unlock_bh(&ct->lock); - } - } -out_unlock: - rcu_read_unlock(); - return ret; -} - -static inline int nf_conntrack_event_report(enum ip_conntrack_events event, struct nf_conn *ct, u32 portid, int report) { + const struct net *net = nf_ct_net(ct); + + if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb)) + return 0; + return nf_conntrack_eventmask_report(1 << event, ct, portid, report); } static inline int nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct) { + const struct net *net = nf_ct_net(ct); + + if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb)) + return 0; + return nf_conntrack_eventmask_report(1 << event, ct, 0, 0); } @@ -172,43 +130,9 @@ int nf_ct_expect_register_notifier(struct net *net, void nf_ct_expect_unregister_notifier(struct net *net, struct nf_exp_event_notifier *nb); -static inline void -nf_ct_expect_event_report(enum ip_conntrack_expect_events event, - struct nf_conntrack_expect *exp, - u32 portid, - int report) -{ - struct net *net = nf_ct_exp_net(exp); - struct nf_exp_event_notifier *notify; - struct nf_conntrack_ecache *e; - - rcu_read_lock(); - notify = rcu_dereference(net->ct.nf_expect_event_cb); - if (notify == NULL) - goto out_unlock; - - e = nf_ct_ecache_find(exp->master); - if (e == NULL) - goto out_unlock; - - if (e->expmask & (1 << event)) { - struct nf_exp_event item = { - .exp = exp, - .portid = portid, - .report = report - }; - notify->fcn(1 << event, &item); - } -out_unlock: - rcu_read_unlock(); -} - -static inline void -nf_ct_expect_event(enum ip_conntrack_expect_events event, - struct nf_conntrack_expect *exp) -{ - nf_ct_expect_event_report(event, exp, 0, 0); -} +void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, + struct nf_conntrack_expect *exp, + u32 portid, int report); int nf_conntrack_ecache_pernet_init(struct net *net); void nf_conntrack_ecache_pernet_fini(struct net *net); @@ -245,8 +169,6 @@ static inline int nf_conntrack_event_report(enum ip_conntrack_events event, u32 portid, int report) { return 0; } static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) {} -static inline void nf_ct_expect_event(enum ip_conntrack_expect_events event, - struct nf_conntrack_expect *exp) {} static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e, struct nf_conntrack_expect *exp, u32 portid, diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index dce56f09ac9a..5ed33ea4718e 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -10,6 +10,7 @@ extern unsigned int nf_ct_expect_hsize; extern unsigned int nf_ct_expect_max; +extern struct hlist_head *nf_ct_expect_hash; struct nf_conntrack_expect { /* Conntrack expectation list member */ diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 55d15049ab2f..1c3035dda31f 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -15,9 +15,6 @@ enum nf_ct_ext_id { #ifdef CONFIG_NF_CONNTRACK_EVENTS NF_CT_EXT_ECACHE, #endif -#ifdef CONFIG_NF_CONNTRACK_ZONES - NF_CT_EXT_ZONE, -#endif #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP NF_CT_EXT_TSTAMP, #endif @@ -38,7 +35,6 @@ enum nf_ct_ext_id { #define NF_CT_EXT_SEQADJ_TYPE struct nf_conn_seqadj #define NF_CT_EXT_ACCT_TYPE struct nf_conn_acct #define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache -#define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone #define NF_CT_EXT_TSTAMP_TYPE struct nf_conn_tstamp #define NF_CT_EXT_TIMEOUT_TYPE struct nf_conn_timeout #define NF_CT_EXT_LABELS_TYPE struct nf_conn_labels @@ -103,9 +99,6 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id, struct nf_ct_ext_type { /* Destroys relationships (can be NULL). */ void (*destroy)(struct nf_conn *ct); - /* Called when realloacted (can be NULL). - Contents has already been moved. */ - void (*move)(void *new, void *old); enum nf_ct_ext_id id; diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 6cf614bc0029..1eaac1f4cd6a 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -58,10 +58,25 @@ struct nf_conntrack_helper *__nf_conntrack_helper_find(const char *name, struct nf_conntrack_helper *nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum); +void nf_ct_helper_init(struct nf_conntrack_helper *helper, + u16 l3num, u16 protonum, const char *name, + u16 default_port, u16 spec_port, u32 id, + const struct nf_conntrack_expect_policy *exp_pol, + u32 expect_class_max, u32 data_len, + int (*help)(struct sk_buff *skb, unsigned int protoff, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo), + int (*from_nlattr)(struct nlattr *attr, + struct nf_conn *ct), + struct module *module); int nf_conntrack_helper_register(struct nf_conntrack_helper *); void nf_conntrack_helper_unregister(struct nf_conntrack_helper *); +int nf_conntrack_helpers_register(struct nf_conntrack_helper *, unsigned int); +void nf_conntrack_helpers_unregister(struct nf_conntrack_helper *, + unsigned int); + struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, struct nf_conntrack_helper *helper, gfp_t gfp); diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 956d8a6ac069..1a5fb36f165f 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -23,6 +23,9 @@ struct nf_conntrack_l4proto { /* L4 Protocol number. */ u_int8_t l4proto; + /* Resolve clashes on insertion races. */ + bool allow_clash; + /* Try to fill in the third arg: dataoff is offset past network protocol hdr. Return true if possible. */ bool (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int dataoff, diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h index 7e2b1d025f50..498814626e28 100644 --- a/include/net/netfilter/nf_conntrack_labels.h +++ b/include/net/netfilter/nf_conntrack_labels.h @@ -10,8 +10,7 @@ #define NF_CT_LABELS_MAX_SIZE ((XT_CONNLABEL_MAXBIT + 1) / BITS_PER_BYTE) struct nf_conn_labels { - u8 words; - unsigned long bits[]; + unsigned long bits[NF_CT_LABELS_MAX_SIZE / sizeof(long)]; }; static inline struct nf_conn_labels *nf_ct_labels_find(const struct nf_conn *ct) @@ -26,39 +25,29 @@ static inline struct nf_conn_labels *nf_ct_labels_find(const struct nf_conn *ct) static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct) { #ifdef CONFIG_NF_CONNTRACK_LABELS - struct nf_conn_labels *cl_ext; struct net *net = nf_ct_net(ct); - u8 words; - words = ACCESS_ONCE(net->ct.label_words); - if (words == 0) + if (net->ct.labels_used == 0) return NULL; - cl_ext = nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS, - words * sizeof(long), GFP_ATOMIC); - if (cl_ext != NULL) - cl_ext->words = words; - - return cl_ext; + return nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS, + sizeof(struct nf_conn_labels), GFP_ATOMIC); #else return NULL; #endif } -bool nf_connlabel_match(const struct nf_conn *ct, u16 bit); -int nf_connlabel_set(struct nf_conn *ct, u16 bit); - int nf_connlabels_replace(struct nf_conn *ct, const u32 *data, const u32 *mask, unsigned int words); #ifdef CONFIG_NF_CONNTRACK_LABELS int nf_conntrack_labels_init(void); void nf_conntrack_labels_fini(void); -int nf_connlabels_get(struct net *net, unsigned int n_bits); +int nf_connlabels_get(struct net *net, unsigned int bit); void nf_connlabels_put(struct net *net); #else static inline int nf_conntrack_labels_init(void) { return 0; } static inline void nf_conntrack_labels_fini(void) {} -static inline int nf_connlabels_get(struct net *net, unsigned int n_bits) { return 0; } +static inline int nf_connlabels_get(struct net *net, unsigned int bit) { return 0; } static inline void nf_connlabels_put(struct net *net) {} #endif diff --git a/include/net/netfilter/nf_conntrack_zones.h b/include/net/netfilter/nf_conntrack_zones.h index 4e32512cef32..64a718b60839 100644 --- a/include/net/netfilter/nf_conntrack_zones.h +++ b/include/net/netfilter/nf_conntrack_zones.h @@ -9,12 +9,11 @@ static inline const struct nf_conntrack_zone * nf_ct_zone(const struct nf_conn *ct) { - const struct nf_conntrack_zone *nf_ct_zone = NULL; - #ifdef CONFIG_NF_CONNTRACK_ZONES - nf_ct_zone = nf_ct_ext_find(ct, NF_CT_EXT_ZONE); + return &ct->zone; +#else + return &nf_ct_zone_dflt; #endif - return nf_ct_zone ? nf_ct_zone : &nf_ct_zone_dflt; } static inline const struct nf_conntrack_zone * @@ -31,32 +30,22 @@ static inline const struct nf_conntrack_zone * nf_ct_zone_tmpl(const struct nf_conn *tmpl, const struct sk_buff *skb, struct nf_conntrack_zone *tmp) { - const struct nf_conntrack_zone *zone; - +#ifdef CONFIG_NF_CONNTRACK_ZONES if (!tmpl) return &nf_ct_zone_dflt; - zone = nf_ct_zone(tmpl); - if (zone->flags & NF_CT_FLAG_MARK) - zone = nf_ct_zone_init(tmp, skb->mark, zone->dir, 0); - - return zone; + if (tmpl->zone.flags & NF_CT_FLAG_MARK) + return nf_ct_zone_init(tmp, skb->mark, tmpl->zone.dir, 0); +#endif + return nf_ct_zone(tmpl); } -static inline int nf_ct_zone_add(struct nf_conn *ct, gfp_t flags, - const struct nf_conntrack_zone *info) +static inline void nf_ct_zone_add(struct nf_conn *ct, + const struct nf_conntrack_zone *zone) { #ifdef CONFIG_NF_CONNTRACK_ZONES - struct nf_conntrack_zone *nf_ct_zone; - - nf_ct_zone = nf_ct_ext_add(ct, NF_CT_EXT_ZONE, flags); - if (!nf_ct_zone) - return -ENOMEM; - - nf_ct_zone_init(nf_ct_zone, info->id, info->dir, - info->flags); + ct->zone = *zone; #endif - return 0; } static inline bool nf_ct_zone_matches_dir(const struct nf_conntrack_zone *zone, @@ -68,22 +57,34 @@ static inline bool nf_ct_zone_matches_dir(const struct nf_conntrack_zone *zone, static inline u16 nf_ct_zone_id(const struct nf_conntrack_zone *zone, enum ip_conntrack_dir dir) { +#ifdef CONFIG_NF_CONNTRACK_ZONES return nf_ct_zone_matches_dir(zone, dir) ? zone->id : NF_CT_DEFAULT_ZONE_ID; +#else + return NF_CT_DEFAULT_ZONE_ID; +#endif } static inline bool nf_ct_zone_equal(const struct nf_conn *a, const struct nf_conntrack_zone *b, enum ip_conntrack_dir dir) { +#ifdef CONFIG_NF_CONNTRACK_ZONES return nf_ct_zone_id(nf_ct_zone(a), dir) == nf_ct_zone_id(b, dir); +#else + return true; +#endif } static inline bool nf_ct_zone_equal_any(const struct nf_conn *a, const struct nf_conntrack_zone *b) { +#ifdef CONFIG_NF_CONNTRACK_ZONES return nf_ct_zone(a)->id == b->id; +#else + return true; +#endif } #endif /* IS_ENABLED(CONFIG_NF_CONNTRACK) */ #endif /* _NF_CONNTRACK_ZONES_H */ diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index 57639fca223a..83d855ba6af1 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -12,6 +12,9 @@ #define NF_LOG_UID 0x08 /* Log UID owning local socket */ #define NF_LOG_MASK 0x0f +/* This flag indicates that copy_len field in nf_loginfo is set */ +#define NF_LOG_F_COPY_LEN 0x1 + enum nf_log_type { NF_LOG_TYPE_LOG = 0, NF_LOG_TYPE_ULOG, @@ -22,9 +25,13 @@ struct nf_loginfo { u_int8_t type; union { struct { + /* copy_len will be used iff you set + * NF_LOG_F_COPY_LEN in flags + */ u_int32_t copy_len; u_int16_t group; u_int16_t qthreshold; + u_int16_t flags; } ulog; struct { u_int8_t level; diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index 344b1ab19220..c327a431a6f3 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -1,5 +1,6 @@ #ifndef _NF_NAT_H #define _NF_NAT_H +#include <linux/rhashtable.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter/nf_nat.h> #include <net/netfilter/nf_conntrack_tuple.h> @@ -29,8 +30,6 @@ struct nf_conn; /* The structure embedded in the conntrack structure. */ struct nf_conn_nat { - struct hlist_node bysource; - struct nf_conn *ct; union nf_conntrack_nat_help help; #if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV4) || \ IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV6) diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 9c5638ad872e..0dbce55437f2 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -28,8 +28,8 @@ struct nf_queue_handler { struct nf_hook_ops *ops); }; -void nf_register_queue_handler(const struct nf_queue_handler *qh); -void nf_unregister_queue_handler(void); +void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh); +void nf_unregister_queue_handler(struct net *net); void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict); void nf_queue_entry_get_refs(struct nf_queue_entry *entry); diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index f6b1daf2e698..f2f13399ce44 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -167,6 +167,7 @@ struct nft_set_elem { struct nft_set; struct nft_set_iter { + u8 genmask; unsigned int count; unsigned int skip; int err; @@ -235,7 +236,8 @@ struct nft_expr; * @features: features supported by the implementation */ struct nft_set_ops { - bool (*lookup)(const struct nft_set *set, + bool (*lookup)(const struct net *net, + const struct nft_set *set, const u32 *key, const struct nft_set_ext **ext); bool (*update)(struct nft_set *set, @@ -247,11 +249,14 @@ struct nft_set_ops { struct nft_regs *regs, const struct nft_set_ext **ext); - int (*insert)(const struct nft_set *set, + int (*insert)(const struct net *net, + const struct nft_set *set, const struct nft_set_elem *elem); - void (*activate)(const struct nft_set *set, + void (*activate)(const struct net *net, + const struct nft_set *set, const struct nft_set_elem *elem); - void * (*deactivate)(const struct nft_set *set, + void * (*deactivate)(const struct net *net, + const struct nft_set *set, const struct nft_set_elem *elem); void (*remove)(const struct nft_set *set, const struct nft_set_elem *elem); @@ -294,8 +299,8 @@ void nft_unregister_set(struct nft_set_ops *ops); * @udlen: user data length * @udata: user data * @ops: set ops - * @pnet: network namespace * @flags: set flags + * @genmask: generation mask * @klen: key length * @dlen: data length * @data: private set data @@ -303,7 +308,7 @@ void nft_unregister_set(struct nft_set_ops *ops); struct nft_set { struct list_head list; struct list_head bindings; - char name[IFNAMSIZ]; + char name[NFT_SET_MAXNAMELEN]; u32 ktype; u32 dtype; u32 size; @@ -316,8 +321,8 @@ struct nft_set { unsigned char *udata; /* runtime data below here */ const struct nft_set_ops *ops ____cacheline_aligned; - possible_net_t pnet; - u16 flags; + u16 flags:14, + genmask:2; u8 klen; u8 dlen; unsigned char data[] @@ -335,9 +340,9 @@ static inline struct nft_set *nft_set_container_of(const void *priv) } struct nft_set *nf_tables_set_lookup(const struct nft_table *table, - const struct nlattr *nla); + const struct nlattr *nla, u8 genmask); struct nft_set *nf_tables_set_lookup_byid(const struct net *net, - const struct nlattr *nla); + const struct nlattr *nla, u8 genmask); static inline unsigned long nft_set_gc_interval(const struct nft_set *set) { @@ -732,7 +737,6 @@ static inline struct nft_userdata *nft_userdata(const struct nft_rule *rule) enum nft_chain_flags { NFT_BASE_CHAIN = 0x1, - NFT_CHAIN_INACTIVE = 0x2, }; /** @@ -754,7 +758,8 @@ struct nft_chain { u64 handle; u32 use; u16 level; - u8 flags; + u8 flags:6, + genmask:2; char name[NFT_CHAIN_MAXNAMELEN]; }; @@ -796,13 +801,11 @@ struct nft_stats { }; #define NFT_HOOK_OPS_MAX 2 -#define NFT_BASECHAIN_DISABLED (1 << 0) /** * struct nft_base_chain - nf_tables base chain * * @ops: netfilter hook ops - * @pnet: net namespace that this chain belongs to * @type: chain type * @policy: default policy * @stats: per-cpu chain stats @@ -811,7 +814,6 @@ struct nft_stats { */ struct nft_base_chain { struct nf_hook_ops ops[NFT_HOOK_OPS_MAX]; - possible_net_t pnet; const struct nf_chain_type *type; u8 policy; u8 flags; @@ -838,6 +840,7 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv); * @hgenerator: handle generator state * @use: number of chain references to this table * @flags: table flag (see enum nft_table_flags) + * @genmask: generation mask * @name: name of the table */ struct nft_table { @@ -846,7 +849,8 @@ struct nft_table { struct list_head sets; u64 hgenerator; u32 use; - u16 flags; + u16 flags:14, + genmask:2; char name[NFT_TABLE_MAXNAMELEN]; }; @@ -970,6 +974,32 @@ static inline u8 nft_genmask_cur(const struct net *net) #define NFT_GENMASK_ANY ((1 << 0) | (1 << 1)) /* + * Generic transaction helpers + */ + +/* Check if this object is currently active. */ +#define nft_is_active(__net, __obj) \ + (((__obj)->genmask & nft_genmask_cur(__net)) == 0) + +/* Check if this object is active in the next generation. */ +#define nft_is_active_next(__net, __obj) \ + (((__obj)->genmask & nft_genmask_next(__net)) == 0) + +/* This object becomes active in the next generation. */ +#define nft_activate_next(__net, __obj) \ + (__obj)->genmask = nft_genmask_cur(__net) + +/* This object becomes inactive in the next generation. */ +#define nft_deactivate_next(__net, __obj) \ + (__obj)->genmask = nft_genmask_next(__net) + +/* After committing the ruleset, clear the stale generation bit. */ +#define nft_clear(__net, __obj) \ + (__obj)->genmask &= ~nft_genmask_next(__net) +#define nft_active_genmask(__obj, __genmask) \ + !((__obj)->genmask & __genmask) + +/* * Set element transaction helpers */ @@ -979,10 +1009,11 @@ static inline bool nft_set_elem_active(const struct nft_set_ext *ext, return !(ext->genmask & genmask); } -static inline void nft_set_elem_change_active(const struct nft_set *set, +static inline void nft_set_elem_change_active(const struct net *net, + const struct nft_set *set, struct nft_set_ext *ext) { - ext->genmask ^= nft_genmask_next(read_pnet(&set->pnet)); + ext->genmask ^= nft_genmask_next(net); } /* diff --git a/include/net/netlabel.h b/include/net/netlabel.h index 7b5a300de7f5..efe98068880f 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -40,6 +40,7 @@ #include <linux/atomic.h> struct cipso_v4_doi; +struct calipso_doi; /* * NetLabel - A management interface for maintaining network packet label @@ -94,6 +95,8 @@ struct cipso_v4_doi; #define NETLBL_NLTYPE_UNLABELED_NAME "NLBL_UNLBL" #define NETLBL_NLTYPE_ADDRSELECT 6 #define NETLBL_NLTYPE_ADDRSELECT_NAME "NLBL_ADRSEL" +#define NETLBL_NLTYPE_CALIPSO 7 +#define NETLBL_NLTYPE_CALIPSO_NAME "NLBL_CALIPSO" /* * NetLabel - Kernel API for accessing the network packet label mappings. @@ -216,6 +219,63 @@ struct netlbl_lsm_secattr { } attr; }; +/** + * struct netlbl_calipso_ops - NetLabel CALIPSO operations + * @doi_add: add a CALIPSO DOI + * @doi_free: free a CALIPSO DOI + * @doi_getdef: returns a reference to a DOI + * @doi_putdef: releases a reference of a DOI + * @doi_walk: enumerate the DOI list + * @sock_getattr: retrieve the socket's attr + * @sock_setattr: set the socket's attr + * @sock_delattr: remove the socket's attr + * @req_setattr: set the req socket's attr + * @req_delattr: remove the req socket's attr + * @opt_getattr: retrieve attr from memory block + * @skbuff_optptr: find option in packet + * @skbuff_setattr: set the skbuff's attr + * @skbuff_delattr: remove the skbuff's attr + * @cache_invalidate: invalidate cache + * @cache_add: add cache entry + * + * Description: + * This structure is filled out by the CALIPSO engine and passed + * to the NetLabel core via a call to netlbl_calipso_ops_register(). + * It enables the CALIPSO engine (and hence IPv6) to be compiled + * as a module. + */ +struct netlbl_calipso_ops { + int (*doi_add)(struct calipso_doi *doi_def, + struct netlbl_audit *audit_info); + void (*doi_free)(struct calipso_doi *doi_def); + int (*doi_remove)(u32 doi, struct netlbl_audit *audit_info); + struct calipso_doi *(*doi_getdef)(u32 doi); + void (*doi_putdef)(struct calipso_doi *doi_def); + int (*doi_walk)(u32 *skip_cnt, + int (*callback)(struct calipso_doi *doi_def, void *arg), + void *cb_arg); + int (*sock_getattr)(struct sock *sk, + struct netlbl_lsm_secattr *secattr); + int (*sock_setattr)(struct sock *sk, + const struct calipso_doi *doi_def, + const struct netlbl_lsm_secattr *secattr); + void (*sock_delattr)(struct sock *sk); + int (*req_setattr)(struct request_sock *req, + const struct calipso_doi *doi_def, + const struct netlbl_lsm_secattr *secattr); + void (*req_delattr)(struct request_sock *req); + int (*opt_getattr)(const unsigned char *calipso, + struct netlbl_lsm_secattr *secattr); + unsigned char *(*skbuff_optptr)(const struct sk_buff *skb); + int (*skbuff_setattr)(struct sk_buff *skb, + const struct calipso_doi *doi_def, + const struct netlbl_lsm_secattr *secattr); + int (*skbuff_delattr)(struct sk_buff *skb); + void (*cache_invalidate)(void); + int (*cache_add)(const unsigned char *calipso_ptr, + const struct netlbl_lsm_secattr *secattr); +}; + /* * LSM security attribute operations (inline) */ @@ -385,6 +445,14 @@ int netlbl_cfg_cipsov4_map_add(u32 doi, const struct in_addr *addr, const struct in_addr *mask, struct netlbl_audit *audit_info); +int netlbl_cfg_calipso_add(struct calipso_doi *doi_def, + struct netlbl_audit *audit_info); +void netlbl_cfg_calipso_del(u32 doi, struct netlbl_audit *audit_info); +int netlbl_cfg_calipso_map_add(u32 doi, + const char *domain, + const struct in6_addr *addr, + const struct in6_addr *mask, + struct netlbl_audit *audit_info); /* * LSM security attribute operations */ @@ -405,6 +473,12 @@ int netlbl_catmap_setlong(struct netlbl_lsm_catmap **catmap, unsigned long bitmap, gfp_t flags); +/* Bitmap functions + */ +int netlbl_bitmap_walk(const unsigned char *bitmap, u32 bitmap_len, + u32 offset, u8 state); +void netlbl_bitmap_setbit(unsigned char *bitmap, u32 bit, u8 state); + /* * LSM protocol operations (NetLabel LSM/kernel API) */ @@ -427,13 +501,13 @@ int netlbl_skbuff_setattr(struct sk_buff *skb, int netlbl_skbuff_getattr(const struct sk_buff *skb, u16 family, struct netlbl_lsm_secattr *secattr); -void netlbl_skbuff_err(struct sk_buff *skb, int error, int gateway); +void netlbl_skbuff_err(struct sk_buff *skb, u16 family, int error, int gateway); /* * LSM label mapping cache operations */ void netlbl_cache_invalidate(void); -int netlbl_cache_add(const struct sk_buff *skb, +int netlbl_cache_add(const struct sk_buff *skb, u16 family, const struct netlbl_lsm_secattr *secattr); /* @@ -495,6 +569,24 @@ static inline int netlbl_cfg_cipsov4_map_add(u32 doi, { return -ENOSYS; } +static inline int netlbl_cfg_calipso_add(struct calipso_doi *doi_def, + struct netlbl_audit *audit_info) +{ + return -ENOSYS; +} +static inline void netlbl_cfg_calipso_del(u32 doi, + struct netlbl_audit *audit_info) +{ + return; +} +static inline int netlbl_cfg_calipso_map_add(u32 doi, + const char *domain, + const struct in6_addr *addr, + const struct in6_addr *mask, + struct netlbl_audit *audit_info) +{ + return -ENOSYS; +} static inline int netlbl_catmap_walk(struct netlbl_lsm_catmap *catmap, u32 offset) { @@ -586,7 +678,7 @@ static inline void netlbl_cache_invalidate(void) { return; } -static inline int netlbl_cache_add(const struct sk_buff *skb, +static inline int netlbl_cache_add(const struct sk_buff *skb, u16 family, const struct netlbl_lsm_secattr *secattr) { return 0; @@ -598,4 +690,7 @@ static inline struct audit_buffer *netlbl_audit_start(int type, } #endif /* CONFIG_NETLABEL */ +const struct netlbl_calipso_ops * +netlbl_calipso_ops_register(const struct netlbl_calipso_ops *ops); + #endif /* _NETLABEL_H */ diff --git a/include/net/netlink.h b/include/net/netlink.h index 0e3172751755..254a0fc01800 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -98,14 +98,17 @@ * nla_put_u8(skb, type, value) add u8 attribute to skb * nla_put_u16(skb, type, value) add u16 attribute to skb * nla_put_u32(skb, type, value) add u32 attribute to skb - * nla_put_u64(skb, type, value) add u64 attribute to skb + * nla_put_u64_64bits(skb, type, + * value, padattr) add u64 attribute to skb * nla_put_s8(skb, type, value) add s8 attribute to skb * nla_put_s16(skb, type, value) add s16 attribute to skb * nla_put_s32(skb, type, value) add s32 attribute to skb - * nla_put_s64(skb, type, value) add s64 attribute to skb + * nla_put_s64(skb, type, value, + * padattr) add s64 attribute to skb * nla_put_string(skb, type, str) add string attribute to skb * nla_put_flag(skb, type) add flag attribute to skb - * nla_put_msecs(skb, type, jiffies) add msecs attribute to skb + * nla_put_msecs(skb, type, jiffies, + * padattr) add msecs attribute to skb * nla_put_in_addr(skb, type, addr) add IPv4 address attribute to skb * nla_put_in6_addr(skb, type, addr) add IPv6 address attribute to skb * @@ -244,13 +247,21 @@ int nla_memcpy(void *dest, const struct nlattr *src, int count); int nla_memcmp(const struct nlattr *nla, const void *data, size_t size); int nla_strcmp(const struct nlattr *nla, const char *str); struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen); +struct nlattr *__nla_reserve_64bit(struct sk_buff *skb, int attrtype, + int attrlen, int padattr); void *__nla_reserve_nohdr(struct sk_buff *skb, int attrlen); struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen); +struct nlattr *nla_reserve_64bit(struct sk_buff *skb, int attrtype, + int attrlen, int padattr); void *nla_reserve_nohdr(struct sk_buff *skb, int attrlen); void __nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data); +void __nla_put_64bit(struct sk_buff *skb, int attrtype, int attrlen, + const void *data, int padattr); void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data); int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data); +int nla_put_64bit(struct sk_buff *skb, int attrtype, int attrlen, + const void *data, int padattr); int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data); int nla_append(struct sk_buff *skb, int attrlen, const void *data); @@ -837,47 +848,56 @@ static inline int nla_put_le32(struct sk_buff *skb, int attrtype, __le32 value) } /** - * nla_put_u64 - Add a u64 netlink attribute to a socket buffer + * nla_put_u64_64bit - Add a u64 netlink attribute to a skb and align it * @skb: socket buffer to add attribute to * @attrtype: attribute type * @value: numeric value + * @padattr: attribute type for the padding */ -static inline int nla_put_u64(struct sk_buff *skb, int attrtype, u64 value) +static inline int nla_put_u64_64bit(struct sk_buff *skb, int attrtype, + u64 value, int padattr) { - return nla_put(skb, attrtype, sizeof(u64), &value); + return nla_put_64bit(skb, attrtype, sizeof(u64), &value, padattr); } /** - * nla_put_be64 - Add a __be64 netlink attribute to a socket buffer + * nla_put_be64 - Add a __be64 netlink attribute to a socket buffer and align it * @skb: socket buffer to add attribute to * @attrtype: attribute type * @value: numeric value + * @padattr: attribute type for the padding */ -static inline int nla_put_be64(struct sk_buff *skb, int attrtype, __be64 value) +static inline int nla_put_be64(struct sk_buff *skb, int attrtype, __be64 value, + int padattr) { - return nla_put(skb, attrtype, sizeof(__be64), &value); + return nla_put_64bit(skb, attrtype, sizeof(__be64), &value, padattr); } /** - * nla_put_net64 - Add 64-bit network byte order netlink attribute to a socket buffer + * nla_put_net64 - Add 64-bit network byte order nlattr to a skb and align it * @skb: socket buffer to add attribute to * @attrtype: attribute type * @value: numeric value + * @padattr: attribute type for the padding */ -static inline int nla_put_net64(struct sk_buff *skb, int attrtype, __be64 value) +static inline int nla_put_net64(struct sk_buff *skb, int attrtype, __be64 value, + int padattr) { - return nla_put_be64(skb, attrtype | NLA_F_NET_BYTEORDER, value); + return nla_put_be64(skb, attrtype | NLA_F_NET_BYTEORDER, value, + padattr); } /** - * nla_put_le64 - Add a __le64 netlink attribute to a socket buffer + * nla_put_le64 - Add a __le64 netlink attribute to a socket buffer and align it * @skb: socket buffer to add attribute to * @attrtype: attribute type * @value: numeric value + * @padattr: attribute type for the padding */ -static inline int nla_put_le64(struct sk_buff *skb, int attrtype, __le64 value) +static inline int nla_put_le64(struct sk_buff *skb, int attrtype, __le64 value, + int padattr) { - return nla_put(skb, attrtype, sizeof(__le64), &value); + return nla_put_64bit(skb, attrtype, sizeof(__le64), &value, padattr); } /** @@ -914,14 +934,16 @@ static inline int nla_put_s32(struct sk_buff *skb, int attrtype, s32 value) } /** - * nla_put_s64 - Add a s64 netlink attribute to a socket buffer + * nla_put_s64 - Add a s64 netlink attribute to a socket buffer and align it * @skb: socket buffer to add attribute to * @attrtype: attribute type * @value: numeric value + * @padattr: attribute type for the padding */ -static inline int nla_put_s64(struct sk_buff *skb, int attrtype, s64 value) +static inline int nla_put_s64(struct sk_buff *skb, int attrtype, s64 value, + int padattr) { - return nla_put(skb, attrtype, sizeof(s64), &value); + return nla_put_64bit(skb, attrtype, sizeof(s64), &value, padattr); } /** @@ -947,16 +969,18 @@ static inline int nla_put_flag(struct sk_buff *skb, int attrtype) } /** - * nla_put_msecs - Add a msecs netlink attribute to a socket buffer + * nla_put_msecs - Add a msecs netlink attribute to a skb and align it * @skb: socket buffer to add attribute to * @attrtype: attribute type * @njiffies: number of jiffies to convert to msecs + * @padattr: attribute type for the padding */ static inline int nla_put_msecs(struct sk_buff *skb, int attrtype, - unsigned long njiffies) + unsigned long njiffies, int padattr) { u64 tmp = jiffies_to_msecs(njiffies); - return nla_put(skb, attrtype, sizeof(u64), &tmp); + + return nla_put_64bit(skb, attrtype, sizeof(u64), &tmp, padattr); } /** @@ -1231,6 +1255,61 @@ static inline int nla_validate_nested(const struct nlattr *start, int maxtype, } /** + * nla_need_padding_for_64bit - test 64-bit alignment of the next attribute + * @skb: socket buffer the message is stored in + * + * Return true if padding is needed to align the next attribute (nla_data()) to + * a 64-bit aligned area. + */ +static inline bool nla_need_padding_for_64bit(struct sk_buff *skb) +{ +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + /* The nlattr header is 4 bytes in size, that's why we test + * if the skb->data _is_ aligned. A NOP attribute, plus + * nlattr header for next attribute, will make nla_data() + * 8-byte aligned. + */ + if (IS_ALIGNED((unsigned long)skb_tail_pointer(skb), 8)) + return true; +#endif + return false; +} + +/** + * nla_align_64bit - 64-bit align the nla_data() of next attribute + * @skb: socket buffer the message is stored in + * @padattr: attribute type for the padding + * + * Conditionally emit a padding netlink attribute in order to make + * the next attribute we emit have a 64-bit aligned nla_data() area. + * This will only be done in architectures which do not have + * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS defined. + * + * Returns zero on success or a negative error code. + */ +static inline int nla_align_64bit(struct sk_buff *skb, int padattr) +{ + if (nla_need_padding_for_64bit(skb) && + !nla_reserve(skb, padattr, 0)) + return -EMSGSIZE; + + return 0; +} + +/** + * nla_total_size_64bit - total length of attribute including padding + * @payload: length of payload + */ +static inline int nla_total_size_64bit(int payload) +{ + return NLA_ALIGN(nla_attr_size(payload)) +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + + NLA_ALIGN(nla_attr_size(0)) +#endif + ; +} + +/** * nla_for_each_attr - iterate over a stream of attributes * @pos: loop counter, set to current attribute * @head: head of attribute stream diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 723b61c82b3f..38b1a80517f0 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -84,7 +84,6 @@ struct netns_ct { struct ctl_table_header *event_sysctl_header; struct ctl_table_header *helper_sysctl_header; #endif - char *slabname; unsigned int sysctl_log_invalid; /* Log invalid packets */ int sysctl_events; int sysctl_acct; @@ -93,11 +92,6 @@ struct netns_ct { int sysctl_tstamp; int sysctl_checksum; - unsigned int htable_size; - seqcount_t generation; - struct kmem_cache *nf_conntrack_cachep; - struct hlist_nulls_head *hash; - struct hlist_head *expect_hash; struct ct_pcpu __percpu *pcpu_lists; struct ip_conntrack_stat __percpu *stat; struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb; @@ -107,9 +101,5 @@ struct netns_ct { unsigned int labels_used; u8 label_words; #endif -#ifdef CONFIG_NF_NAT_NEEDED - struct hlist_head *nat_bysource; - unsigned int nat_htable_size; -#endif }; #endif diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index a69cde3ce460..d061ffeb1e71 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -133,6 +133,9 @@ struct netns_ipv4 { struct fib_rules_ops *mr_rules_ops; #endif #endif +#ifdef CONFIG_IP_ROUTE_MULTIPATH + int sysctl_fib_multipath_use_neigh; +#endif atomic_t rt_genid; }; #endif diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index 38aa4983e2a9..36d723579af2 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -5,11 +5,13 @@ struct proc_dir_entry; struct nf_logger; +struct nf_queue_handler; struct netns_nf { #if defined CONFIG_PROC_FS struct proc_dir_entry *proc_netfilter; #endif + const struct nf_queue_handler __rcu *queue_handler; const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO]; #ifdef CONFIG_SYSCTL struct ctl_table_header *nf_log_dir_header; diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 730d82ad6ee5..24cd3949a9a4 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -80,6 +80,7 @@ struct netns_xfrm { struct flow_cache flow_cache_global; atomic_t flow_cache_genid; struct list_head flow_cache_gc_list; + atomic_t flow_cache_gc_count; spinlock_t flow_cache_gc_lock; struct work_struct flow_cache_gc_work; struct work_struct flow_cache_flush_work; diff --git a/include/net/nfc/digital.h b/include/net/nfc/digital.h index 0ae101eef0f4..74fa7eb94e72 100644 --- a/include/net/nfc/digital.h +++ b/include/net/nfc/digital.h @@ -220,12 +220,13 @@ struct nfc_digital_dev { struct list_head cmd_queue; struct mutex cmd_lock; - struct work_struct poll_work; + struct delayed_work poll_work; u8 curr_protocol; u8 curr_rf_tech; u8 curr_nfc_dep_pni; u8 did; + u16 dep_rwt; u8 local_payload_max; u8 remote_payload_max; @@ -237,7 +238,6 @@ struct nfc_digital_dev { int nack_count; struct sk_buff *saved_skb; - unsigned int saved_skb_len; u16 target_fsc; diff --git a/include/net/nfc/llc.h b/include/net/nfc/llc.h index c25fbdee0d61..7ecb45757897 100644 --- a/include/net/nfc/llc.h +++ b/include/net/nfc/llc.h @@ -37,10 +37,6 @@ struct nfc_llc *nfc_llc_allocate(const char *name, struct nfc_hci_dev *hdev, int tx_tailroom, llc_failure_t llc_failure); void nfc_llc_free(struct nfc_llc *llc); -void nfc_llc_get_rx_head_tail_room(struct nfc_llc *llc, int *rx_headroom, - int *rx_tailroom); - - int nfc_llc_start(struct nfc_llc *llc); int nfc_llc_stop(struct nfc_llc *llc); void nfc_llc_rcv_from_drv(struct nfc_llc *llc, struct sk_buff *skb); diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index 57ce24fb0047..87499b6b35d6 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -109,7 +109,13 @@ struct nci_ops { struct nci_conn_info { struct list_head list; - __u8 id; /* can be an RF Discovery ID or an NFCEE ID */ + /* NCI specification 4.4.2 Connection Creation + * The combination of destination type and destination specific + * parameters shall uniquely identify a single destination for the + * Logical Connection + */ + struct dest_spec_params *dest_params; + __u8 dest_type; __u8 conn_id; __u8 max_pkt_payload_len; @@ -260,7 +266,9 @@ struct nci_dev { __u32 manufact_specific_info; /* Save RF Discovery ID or NFCEE ID under conn_create */ - __u8 cur_id; + struct dest_spec_params cur_params; + /* Save destination type under conn_create */ + __u8 cur_dest_type; /* stored during nci_data_exchange */ struct sk_buff *rx_data_reassembly; @@ -298,6 +306,8 @@ int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type, size_t params_len, struct core_conn_create_dest_spec_params *params); int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id); +int nci_nfcc_loopback(struct nci_dev *ndev, void *data, size_t data_len, + struct sk_buff **resp); struct nci_hci_dev *nci_hci_allocate(struct nci_dev *ndev); int nci_hci_send_event(struct nci_dev *ndev, u8 gate, u8 event, @@ -378,7 +388,8 @@ void nci_clear_target_list(struct nci_dev *ndev); void nci_req_complete(struct nci_dev *ndev, int result); struct nci_conn_info *nci_get_conn_info_by_conn_id(struct nci_dev *ndev, int conn_id); -int nci_get_conn_info_by_id(struct nci_dev *ndev, u8 id); +int nci_get_conn_info_by_dest_type_params(struct nci_dev *ndev, u8 dest_type, + struct dest_spec_params *params); /* ----- NCI status code ----- */ int nci_to_errno(__u8 code); diff --git a/include/net/nl802154.h b/include/net/nl802154.h index 32cb3e591e07..ddcee128f5d9 100644 --- a/include/net/nl802154.h +++ b/include/net/nl802154.h @@ -54,6 +54,8 @@ enum nl802154_commands { NL802154_CMD_SET_ACKREQ_DEFAULT, + NL802154_CMD_SET_WPAN_PHY_NETNS, + /* add new commands above here */ #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL @@ -124,6 +126,11 @@ enum nl802154_attrs { NL802154_ATTR_ACKREQ_DEFAULT, + NL802154_ATTR_PAD, + + NL802154_ATTR_PID, + NL802154_ATTR_NETNS_FD, + /* add attributes here, update the policy in nl802154.c */ #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL @@ -295,6 +302,7 @@ enum nl802154_dev_addr_attrs { NL802154_DEV_ADDR_ATTR_MODE, NL802154_DEV_ADDR_ATTR_SHORT, NL802154_DEV_ADDR_ATTR_EXTENDED, + NL802154_DEV_ADDR_ATTR_PAD, /* keep last */ __NL802154_DEV_ADDR_ATTR_AFTER_LAST, @@ -320,6 +328,7 @@ enum nl802154_key_id_attrs { NL802154_KEY_ID_ATTR_IMPLICIT, NL802154_KEY_ID_ATTR_SOURCE_SHORT, NL802154_KEY_ID_ATTR_SOURCE_EXTENDED, + NL802154_KEY_ID_ATTR_PAD, /* keep last */ __NL802154_KEY_ID_ATTR_AFTER_LAST, @@ -402,6 +411,7 @@ enum nl802154_dev { NL802154_DEV_ATTR_EXTENDED_ADDR, NL802154_DEV_ATTR_SECLEVEL_EXEMPT, NL802154_DEV_ATTR_KEY_MODE, + NL802154_DEV_ATTR_PAD, /* keep last */ __NL802154_DEV_ATTR_AFTER_LAST, @@ -414,6 +424,7 @@ enum nl802154_devkey { NL802154_DEVKEY_ATTR_FRAME_COUNTER, NL802154_DEVKEY_ATTR_EXTENDED_ADDR, NL802154_DEVKEY_ATTR_ID, + NL802154_DEVKEY_ATTR_PAD, /* keep last */ __NL802154_DEVKEY_ATTR_AFTER_LAST, diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index caa5e18636df..6f8d65342d3a 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -392,19 +392,37 @@ struct tc_cls_u32_offload { }; }; -/* tca flags definitions */ -#define TCA_CLS_FLAGS_SKIP_HW 1 - -static inline bool tc_should_offload(struct net_device *dev, u32 flags) +static inline bool tc_should_offload(const struct net_device *dev, + const struct tcf_proto *tp, u32 flags) { + const struct Qdisc *sch = tp->q; + const struct Qdisc_class_ops *cops = sch->ops->cl_ops; + if (!(dev->features & NETIF_F_HW_TC)) return false; - if (flags & TCA_CLS_FLAGS_SKIP_HW) return false; - if (!dev->netdev_ops->ndo_setup_tc) return false; + if (cops && cops->tcf_cl_offload) + return cops->tcf_cl_offload(tp->classid); + + return true; +} + +static inline bool tc_skip_sw(u32 flags) +{ + return (flags & TCA_CLS_FLAGS_SKIP_SW) ? true : false; +} + +/* SKIP_HW and SKIP_SW are mutually exclusive flags. */ +static inline bool tc_flags_valid(u32 flags) +{ + if (flags & ~(TCA_CLS_FLAGS_SKIP_HW | TCA_CLS_FLAGS_SKIP_SW)) + return false; + + if (!(flags ^ (TCA_CLS_FLAGS_SKIP_HW | TCA_CLS_FLAGS_SKIP_SW))) + return false; return true; } @@ -412,6 +430,7 @@ static inline bool tc_should_offload(struct net_device *dev, u32 flags) enum tc_fl_command { TC_CLSFLOWER_REPLACE, TC_CLSFLOWER_DESTROY, + TC_CLSFLOWER_STATS, }; struct tc_cls_flower_offload { @@ -423,4 +442,15 @@ struct tc_cls_flower_offload { struct tcf_exts *exts; }; +enum tc_matchall_command { + TC_CLSMATCHALL_REPLACE, + TC_CLSMATCHALL_DESTROY, +}; + +struct tc_cls_matchall_offload { + enum tc_matchall_command command; + struct tcf_exts *exts; + unsigned long cookie; +}; + #endif diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 401038d2f9b8..7caa99b482c6 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -61,17 +61,18 @@ psched_tdiff_bounded(psched_time_t tv1, psched_time_t tv2, psched_time_t bound) } struct qdisc_watchdog { + u64 last_expires; struct hrtimer timer; struct Qdisc *qdisc; }; void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc); -void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires, bool throttle); +void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires); static inline void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires) { - qdisc_watchdog_schedule_ns(wd, PSCHED_TICKS2NS(expires), true); + qdisc_watchdog_schedule_ns(wd, PSCHED_TICKS2NS(expires)); } void qdisc_watchdog_cancel(struct qdisc_watchdog *wd); diff --git a/include/net/protocol.h b/include/net/protocol.h index da689f5432de..bf36ca34af7a 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -107,9 +107,6 @@ int inet_del_offload(const struct net_offload *prot, unsigned char num); void inet_register_protosw(struct inet_protosw *p); void inet_unregister_protosw(struct inet_protosw *p); -int udp_add_offload(struct net *net, struct udp_offload *prot); -void udp_del_offload(struct udp_offload *prot); - #if IS_ENABLED(CONFIG_IPV6) int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char num); int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char num); diff --git a/include/net/request_sock.h b/include/net/request_sock.h index f49759decb28..6ebe13eb1c4c 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -85,24 +85,23 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, struct request_sock *req; req = kmem_cache_alloc(ops->slab, GFP_ATOMIC | __GFP_NOWARN); - - if (req) { - req->rsk_ops = ops; - if (attach_listener) { - sock_hold(sk_listener); - req->rsk_listener = sk_listener; - } else { - req->rsk_listener = NULL; + if (!req) + return NULL; + req->rsk_listener = NULL; + if (attach_listener) { + if (unlikely(!atomic_inc_not_zero(&sk_listener->sk_refcnt))) { + kmem_cache_free(ops->slab, req); + return NULL; } - req_to_sk(req)->sk_prot = sk_listener->sk_prot; - sk_node_init(&req_to_sk(req)->sk_node); - sk_tx_queue_clear(req_to_sk(req)); - req->saved_syn = NULL; - /* Following is temporary. It is coupled with debugging - * helpers in reqsk_put() & reqsk_free() - */ - atomic_set(&req->rsk_refcnt, 0); + req->rsk_listener = sk_listener; } + req->rsk_ops = ops; + req_to_sk(req)->sk_prot = sk_listener->sk_prot; + sk_node_init(&req_to_sk(req)->sk_node); + sk_tx_queue_clear(req_to_sk(req)); + req->saved_syn = NULL; + atomic_set(&req->rsk_refcnt, 0); + return req; } diff --git a/include/net/route.h b/include/net/route.h index 9b0a523bb428..ad777d79af94 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -209,6 +209,9 @@ unsigned int inet_addr_type_dev_table(struct net *net, void ip_rt_multicast_event(struct in_device *); int ip_rt_ioctl(struct net *, unsigned int cmd, void __user *arg); void ip_rt_get_source(u8 *src, struct sk_buff *skb, struct rtable *rt); +struct rtable *rt_dst_alloc(struct net_device *dev, + unsigned int flags, u16 type, + bool nopolicy, bool noxfrm, bool will_cache); struct in_ifaddr; void fib_add_ifaddr(struct in_ifaddr *); @@ -322,10 +325,11 @@ static inline struct rtable *ip_route_newports(struct flowi4 *fl4, struct rtable static inline int inet_iif(const struct sk_buff *skb) { - int iif = skb_rtable(skb)->rt_iif; + struct rtable *rt = skb_rtable(skb); + + if (rt && rt->rt_iif) + return rt->rt_iif; - if (iif) - return iif; return skb->skb_iif; } diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 2f87c1ba13de..4113916cc1bb 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -47,6 +47,9 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh) * @get_num_rx_queues: Function to determine number of receive queues * to create when creating a new device. * @get_link_net: Function to get the i/o netns of the device + * @get_linkxstats_size: Function to calculate the required room for + * dumping device-specific extended link stats + * @fill_linkxstats: Function to dump device-specific extended link stats */ struct rtnl_link_ops { struct list_head list; @@ -95,6 +98,11 @@ struct rtnl_link_ops { const struct net_device *dev, const struct net_device *slave_dev); struct net *(*get_link_net)(const struct net_device *dev); + size_t (*get_linkxstats_size)(const struct net_device *dev, + int attr); + int (*fill_linkxstats)(struct sk_buff *skb, + const struct net_device *dev, + int *prividx, int attr); }; int __rtnl_link_register(struct rtnl_link_ops *ops); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 46e55f0202a6..909aff2db2b3 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -26,14 +26,6 @@ struct qdisc_rate_table { enum qdisc_state_t { __QDISC_STATE_SCHED, __QDISC_STATE_DEACTIVATED, - __QDISC_STATE_THROTTLED, -}; - -/* - * following bits are only changed while qdisc lock is held - */ -enum qdisc___state_t { - __QDISC___STATE_RUNNING = 1, }; struct qdisc_size_table { @@ -45,8 +37,10 @@ struct qdisc_size_table { }; struct Qdisc { - int (*enqueue)(struct sk_buff *skb, struct Qdisc *dev); - struct sk_buff * (*dequeue)(struct Qdisc *dev); + int (*enqueue)(struct sk_buff *skb, + struct Qdisc *sch, + struct sk_buff **to_free); + struct sk_buff * (*dequeue)(struct Qdisc *sch); unsigned int flags; #define TCQ_F_BUILTIN 1 #define TCQ_F_INGRESS 2 @@ -70,31 +64,25 @@ struct Qdisc { struct list_head list; u32 handle; u32 parent; - int (*reshape_fail)(struct sk_buff *skb, - struct Qdisc *q); - void *u32_node; - /* This field is deprecated, but it is still used by CBQ - * and it will live until better solution will be invented. - */ - struct Qdisc *__parent; struct netdev_queue *dev_queue; struct gnet_stats_rate_est64 rate_est; struct gnet_stats_basic_cpu __percpu *cpu_bstats; struct gnet_stats_queue __percpu *cpu_qstats; - struct Qdisc *next_sched; - struct sk_buff *gso_skb; /* * For performance sake on SMP, we put highly modified fields at the end */ - unsigned long state; + struct sk_buff *gso_skb ____cacheline_aligned_in_smp; struct sk_buff_head q; struct gnet_stats_basic_packed bstats; - unsigned int __state; + seqcount_t running; struct gnet_stats_queue qstats; + unsigned long state; + struct Qdisc *next_sched; + struct sk_buff *skb_bad_txq; struct rcu_head rcu_head; int padded; atomic_t refcnt; @@ -104,20 +92,24 @@ struct Qdisc { static inline bool qdisc_is_running(const struct Qdisc *qdisc) { - return (qdisc->__state & __QDISC___STATE_RUNNING) ? true : false; + return (raw_read_seqcount(&qdisc->running) & 1) ? true : false; } static inline bool qdisc_run_begin(struct Qdisc *qdisc) { if (qdisc_is_running(qdisc)) return false; - qdisc->__state |= __QDISC___STATE_RUNNING; + /* Variant of write_seqcount_begin() telling lockdep a trylock + * was attempted. + */ + raw_write_seqcount_begin(&qdisc->running); + seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_); return true; } static inline void qdisc_run_end(struct Qdisc *qdisc) { - qdisc->__state &= ~__QDISC___STATE_RUNNING; + write_seqcount_end(&qdisc->running); } static inline bool qdisc_may_bulk(const struct Qdisc *qdisc) @@ -135,21 +127,6 @@ static inline int qdisc_avail_bulklimit(const struct netdev_queue *txq) #endif } -static inline bool qdisc_is_throttled(const struct Qdisc *qdisc) -{ - return test_bit(__QDISC_STATE_THROTTLED, &qdisc->state) ? true : false; -} - -static inline void qdisc_throttled(struct Qdisc *qdisc) -{ - set_bit(__QDISC_STATE_THROTTLED, &qdisc->state); -} - -static inline void qdisc_unthrottled(struct Qdisc *qdisc) -{ - clear_bit(__QDISC_STATE_THROTTLED, &qdisc->state); -} - struct Qdisc_class_ops { /* Child qdisc manipulation */ struct netdev_queue * (*select_queue)(struct Qdisc *, struct tcmsg *); @@ -168,6 +145,7 @@ struct Qdisc_class_ops { /* Filter manipulation */ struct tcf_proto __rcu ** (*tcf_chain)(struct Qdisc *, unsigned long); + bool (*tcf_cl_offload)(u32 classid); unsigned long (*bind_tcf)(struct Qdisc *, unsigned long, u32 classid); void (*unbind_tcf)(struct Qdisc *, unsigned long); @@ -185,10 +163,11 @@ struct Qdisc_ops { char id[IFNAMSIZ]; int priv_size; - int (*enqueue)(struct sk_buff *, struct Qdisc *); + int (*enqueue)(struct sk_buff *skb, + struct Qdisc *sch, + struct sk_buff **to_free); struct sk_buff * (*dequeue)(struct Qdisc *); struct sk_buff * (*peek)(struct Qdisc *); - unsigned int (*drop)(struct Qdisc *); int (*init)(struct Qdisc *, struct nlattr *arg); void (*reset)(struct Qdisc *); @@ -321,6 +300,14 @@ static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc) return qdisc_lock(root); } +static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc) +{ + struct Qdisc *root = qdisc_root_sleeping(qdisc); + + ASSERT_RTNL(); + return &root->running; +} + static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc) { return qdisc->dev_queue->dev; @@ -516,10 +503,11 @@ static inline void qdisc_calculate_pkt_len(struct sk_buff *skb, #endif } -static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch) +static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) { qdisc_calculate_pkt_len(skb, sch); - return sch->enqueue(skb, sch); + return sch->enqueue(skb, sch, to_free); } static inline bool qdisc_is_percpu_stats(const struct Qdisc *q) @@ -527,11 +515,27 @@ static inline bool qdisc_is_percpu_stats(const struct Qdisc *q) return q->flags & TCQ_F_CPUSTATS; } +static inline void _bstats_update(struct gnet_stats_basic_packed *bstats, + __u64 bytes, __u32 packets) +{ + bstats->bytes += bytes; + bstats->packets += packets; +} + static inline void bstats_update(struct gnet_stats_basic_packed *bstats, const struct sk_buff *skb) { - bstats->bytes += qdisc_pkt_len(skb); - bstats->packets += skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1; + _bstats_update(bstats, + qdisc_pkt_len(skb), + skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1); +} + +static inline void _bstats_cpu_update(struct gnet_stats_basic_cpu *bstats, + __u64 bytes, __u32 packets) +{ + u64_stats_update_begin(&bstats->syncp); + _bstats_update(&bstats->bstats, bytes, packets); + u64_stats_update_end(&bstats->syncp); } static inline void bstats_cpu_update(struct gnet_stats_basic_cpu *bstats, @@ -628,40 +632,36 @@ static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch) return __qdisc_dequeue_head(sch, &sch->q); } +/* Instead of calling kfree_skb() while root qdisc lock is held, + * queue the skb for future freeing at end of __dev_xmit_skb() + */ +static inline void __qdisc_drop(struct sk_buff *skb, struct sk_buff **to_free) +{ + skb->next = *to_free; + *to_free = skb; +} + static inline unsigned int __qdisc_queue_drop_head(struct Qdisc *sch, - struct sk_buff_head *list) + struct sk_buff_head *list, + struct sk_buff **to_free) { struct sk_buff *skb = __skb_dequeue(list); if (likely(skb != NULL)) { unsigned int len = qdisc_pkt_len(skb); + qdisc_qstats_backlog_dec(sch, skb); - kfree_skb(skb); + __qdisc_drop(skb, to_free); return len; } return 0; } -static inline unsigned int qdisc_queue_drop_head(struct Qdisc *sch) +static inline unsigned int qdisc_queue_drop_head(struct Qdisc *sch, + struct sk_buff **to_free) { - return __qdisc_queue_drop_head(sch, &sch->q); -} - -static inline struct sk_buff *__qdisc_dequeue_tail(struct Qdisc *sch, - struct sk_buff_head *list) -{ - struct sk_buff *skb = __skb_dequeue_tail(list); - - if (likely(skb != NULL)) - qdisc_qstats_backlog_dec(sch, skb); - - return skb; -} - -static inline struct sk_buff *qdisc_dequeue_tail(struct Qdisc *sch) -{ - return __qdisc_dequeue_tail(sch, &sch->q); + return __qdisc_queue_drop_head(sch, &sch->q, to_free); } static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch) @@ -675,9 +675,11 @@ static inline struct sk_buff *qdisc_peek_dequeued(struct Qdisc *sch) /* we can reuse ->gso_skb because peek isn't called for root qdiscs */ if (!sch->gso_skb) { sch->gso_skb = sch->dequeue(sch); - if (sch->gso_skb) + if (sch->gso_skb) { /* it's still part of the queue */ + qdisc_qstats_backlog_inc(sch, sch->gso_skb); sch->q.qlen++; + } } return sch->gso_skb; @@ -690,6 +692,7 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) if (skb) { sch->gso_skb = NULL; + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; } else { skb = sch->dequeue(sch); @@ -698,19 +701,21 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) return skb; } -static inline void __qdisc_reset_queue(struct Qdisc *sch, - struct sk_buff_head *list) +static inline void __qdisc_reset_queue(struct sk_buff_head *list) { /* * We do not know the backlog in bytes of this list, it * is up to the caller to correct it */ - __skb_queue_purge(list); + if (!skb_queue_empty(list)) { + rtnl_kfree_skbs(list->next, list->prev); + __skb_queue_head_init(list); + } } static inline void qdisc_reset_queue(struct Qdisc *sch) { - __qdisc_reset_queue(sch, &sch->q); + __qdisc_reset_queue(&sch->q); sch->qstats.backlog = 0; } @@ -731,46 +736,19 @@ static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new, return old; } -static inline unsigned int __qdisc_queue_drop(struct Qdisc *sch, - struct sk_buff_head *list) -{ - struct sk_buff *skb = __qdisc_dequeue_tail(sch, list); - - if (likely(skb != NULL)) { - unsigned int len = qdisc_pkt_len(skb); - kfree_skb(skb); - return len; - } - - return 0; -} - -static inline unsigned int qdisc_queue_drop(struct Qdisc *sch) -{ - return __qdisc_queue_drop(sch, &sch->q); -} - -static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch) +static inline void rtnl_qdisc_drop(struct sk_buff *skb, struct Qdisc *sch) { - kfree_skb(skb); + rtnl_kfree_skbs(skb, skb); qdisc_qstats_drop(sch); - - return NET_XMIT_DROP; } -static inline int qdisc_reshape_fail(struct sk_buff *skb, struct Qdisc *sch) + +static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) { + __qdisc_drop(skb, to_free); qdisc_qstats_drop(sch); -#ifdef CONFIG_NET_CLS_ACT - if (sch->reshape_fail == NULL || sch->reshape_fail(skb, sch)) - goto drop; - - return NET_XMIT_SUCCESS; - -drop: -#endif - kfree_skb(skb); return NET_XMIT_DROP; } diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 8c337cd0e1e4..5b847e49f7e9 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -214,7 +214,7 @@ typedef enum { SCTP_SS_LISTENING = TCP_LISTEN, SCTP_SS_ESTABLISHING = TCP_SYN_SENT, SCTP_SS_ESTABLISHED = TCP_ESTABLISHED, - SCTP_SS_CLOSING = TCP_CLOSING, + SCTP_SS_CLOSING = TCP_CLOSE_WAIT, } sctp_sock_state_t; /* These functions map various type to printable names. */ diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 65521cfdcade..632e205ca54b 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -116,6 +116,22 @@ extern struct percpu_counter sctp_sockets_allocated; int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *); struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *); +int sctp_transport_walk_start(struct rhashtable_iter *iter); +void sctp_transport_walk_stop(struct rhashtable_iter *iter); +struct sctp_transport *sctp_transport_get_next(struct net *net, + struct rhashtable_iter *iter); +struct sctp_transport *sctp_transport_get_idx(struct net *net, + struct rhashtable_iter *iter, int pos); +int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), + struct net *net, + const union sctp_addr *laddr, + const union sctp_addr *paddr, void *p); +int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), + struct net *net, int pos, void *p); +int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p); +int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, + struct sctp_info *info); + /* * sctp/primitive.c */ @@ -170,6 +186,10 @@ void sctp_assocs_proc_exit(struct net *net); int sctp_remaddr_proc_init(struct net *net); void sctp_remaddr_proc_exit(struct net *net); +/* + * sctp/offload.c + */ +int sctp_offload_init(void); /* * Module global variables @@ -189,10 +209,9 @@ extern int sysctl_sctp_wmem[3]; */ /* SCTP SNMP MIB stats handlers */ -#define SCTP_INC_STATS(net, field) SNMP_INC_STATS((net)->sctp.sctp_statistics, field) -#define SCTP_INC_STATS_BH(net, field) SNMP_INC_STATS_BH((net)->sctp.sctp_statistics, field) -#define SCTP_INC_STATS_USER(net, field) SNMP_INC_STATS_USER((net)->sctp.sctp_statistics, field) -#define SCTP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->sctp.sctp_statistics, field) +#define SCTP_INC_STATS(net, field) SNMP_INC_STATS((net)->sctp.sctp_statistics, field) +#define __SCTP_INC_STATS(net, field) __SNMP_INC_STATS((net)->sctp.sctp_statistics, field) +#define SCTP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->sctp.sctp_statistics, field) /* sctp mib definitions */ enum { @@ -359,21 +378,6 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp); #define sctp_skb_for_each(pos, head, tmp) \ skb_queue_walk_safe(head, pos, tmp) -/* A helper to append an entire skb list (list) to another (head). */ -static inline void sctp_skb_list_tail(struct sk_buff_head *list, - struct sk_buff_head *head) -{ - unsigned long flags; - - spin_lock_irqsave(&head->lock, flags); - spin_lock(&list->lock); - - skb_queue_splice_tail_init(list, head); - - spin_unlock(&list->lock); - spin_unlock_irqrestore(&head->lock, flags); -} - /** * sctp_list_dequeue - remove from the head of the queue * @list: list to dequeue from @@ -386,11 +390,9 @@ static inline struct list_head *sctp_list_dequeue(struct list_head *list) { struct list_head *result = NULL; - if (list->next != list) { + if (!list_empty(list)) { result = list->next; - list->next = result->next; - list->next->prev = list; - INIT_LIST_HEAD(result); + list_del_init(result); } return result; } diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 6df1ce7a411c..ce93c4b10d26 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -59,6 +59,7 @@ #include <linux/workqueue.h> /* We need tq_struct. */ #include <linux/sctp.h> /* We need sctp* header structs. */ #include <net/sctp/auth.h> /* We need auth specific structs */ +#include <net/ip.h> /* For inet_skb_parm */ /* A convenience structure for handling sockaddr structures. * We should wean ourselves off this. @@ -210,14 +211,15 @@ struct sctp_sock { int user_frag; __u32 autoclose; - __u8 nodelay; - __u8 disable_fragments; - __u8 v4mapped; - __u8 frag_interleave; __u32 adaptation_ind; __u32 pd_point; - __u8 recvrcvinfo; - __u8 recvnxtinfo; + __u16 nodelay:1, + disable_fragments:1, + v4mapped:1, + frag_interleave:1, + recvrcvinfo:1, + recvnxtinfo:1, + data_ready_signalled:1; atomic_t pd_mode; /* Receive to here while partial delivery is in effect. */ @@ -565,6 +567,9 @@ struct sctp_chunk { /* This points to the sk_buff containing the actual data. */ struct sk_buff *skb; + /* In case of GSO packets, this will store the head one */ + struct sk_buff *head_skb; + /* These are the SCTP headers by reverse order in a packet. * Note that some of these may happen more than once. In that * case, we point at the "current" one, whatever that means @@ -598,6 +603,16 @@ struct sctp_chunk { /* This needs to be recoverable for SCTP_SEND_FAILED events. */ struct sctp_sndrcvinfo sinfo; + /* We use this field to record param for prsctp policies, + * for TTL policy, it is the time_to_drop of this chunk, + * for RTX policy, it is the max_sent_count of this chunk, + * for PRIO policy, it is the priority of this chunk. + */ + unsigned long prsctp_param; + + /* How many times this chunk have been sent, for prsctp RTX policy */ + int sent_count; + /* Which association does this belong to? */ struct sctp_association *asoc; @@ -695,6 +710,8 @@ struct sctp_packet { size_t overhead; /* This is the total size of all chunks INCLUDING padding. */ size_t size; + /* This is the maximum size this packet may have */ + size_t max_size; /* The packet is destined for this transport address. * The function we finally use to pass down to the next lower @@ -847,6 +864,11 @@ struct sctp_transport { */ ktime_t last_time_heard; + /* When was the last time that we sent a chunk using this + * transport? We use this to check for idle transports + */ + unsigned long last_time_sent; + /* Last time(in jiffies) when cwnd is reduced due to the congestion * indication based on ECNE chunk. */ @@ -952,7 +974,8 @@ void sctp_transport_route(struct sctp_transport *, union sctp_addr *, struct sctp_sock *); void sctp_transport_pmtu(struct sctp_transport *, struct sock *sk); void sctp_transport_free(struct sctp_transport *); -void sctp_transport_reset_timers(struct sctp_transport *); +void sctp_transport_reset_t3_rtx(struct sctp_transport *); +void sctp_transport_reset_hb_timer(struct sctp_transport *); int sctp_transport_hold(struct sctp_transport *); void sctp_transport_put(struct sctp_transport *); void sctp_transport_update_rto(struct sctp_transport *, __u32); @@ -1062,12 +1085,36 @@ void sctp_retransmit(struct sctp_outq *, struct sctp_transport *, sctp_retransmit_reason_t); void sctp_retransmit_mark(struct sctp_outq *, struct sctp_transport *, __u8); int sctp_outq_uncork(struct sctp_outq *, gfp_t gfp); +void sctp_prsctp_prune(struct sctp_association *asoc, + struct sctp_sndrcvinfo *sinfo, int msg_len); /* Uncork and flush an outqueue. */ static inline void sctp_outq_cork(struct sctp_outq *q) { q->cork = 1; } +/* SCTP skb control block. + * sctp_input_cb is currently used on rx and sock rx queue + */ +struct sctp_input_cb { + union { + struct inet_skb_parm h4; +#if IS_ENABLED(CONFIG_IPV6) + struct inet6_skb_parm h6; +#endif + } header; + struct sctp_chunk *chunk; + struct sctp_af *af; +}; +#define SCTP_INPUT_CB(__skb) ((struct sctp_input_cb *)&((__skb)->cb[0])) + +static inline const struct sk_buff *sctp_gso_headskb(const struct sk_buff *skb) +{ + const struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk; + + return chunk->head_skb ? : skb; +} + /* These bind address data fields common between endpoints and associations */ struct sctp_bind_addr { @@ -1244,7 +1291,8 @@ struct sctp_endpoint { /* SCTP-AUTH: endpoint shared keys */ struct list_head endpoint_shared_keys; __u16 active_key_id; - __u8 auth_enable; + __u8 auth_enable:1, + prsctp_enable:1; }; /* Recover the outter endpoint structure. */ @@ -1836,9 +1884,15 @@ struct sctp_association { __u16 active_key_id; __u8 need_ecne:1, /* Need to send an ECNE Chunk? */ - temp:1; /* Is it a temporary association? */ + temp:1, /* Is it a temporary association? */ + prsctp_enable:1; struct sctp_priv_assoc_stats stats; + + int sent_cnt_removable; + + __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1]; + __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1]; }; diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h index cccdcfd14973..2c098cd7e7e2 100644 --- a/include/net/sctp/ulpevent.h +++ b/include/net/sctp/ulpevent.h @@ -48,15 +48,15 @@ */ struct sctp_ulpevent { struct sctp_association *asoc; - __u16 stream; - __u16 ssn; - __u16 flags; + struct sctp_chunk *chunk; + unsigned int rmem_len; __u32 ppid; __u32 tsn; __u32 cumtsn; - int msg_flags; - int iif; - unsigned int rmem_len; + __u16 stream; + __u16 ssn; + __u16 flags; + __u16 msg_flags; }; /* Retrieve the skb this event sits inside of. */ diff --git a/include/net/snmp.h b/include/net/snmp.h index 35512ac6dcfb..c9228ad7ee91 100644 --- a/include/net/snmp.h +++ b/include/net/snmp.h @@ -123,12 +123,9 @@ struct linux_xfrm_mib { #define DECLARE_SNMP_STAT(type, name) \ extern __typeof__(type) __percpu *name -#define SNMP_INC_STATS_BH(mib, field) \ +#define __SNMP_INC_STATS(mib, field) \ __this_cpu_inc(mib->mibs[field]) -#define SNMP_INC_STATS_USER(mib, field) \ - this_cpu_inc(mib->mibs[field]) - #define SNMP_INC_STATS_ATOMIC_LONG(mib, field) \ atomic_long_inc(&mib->mibs[field]) @@ -138,12 +135,9 @@ struct linux_xfrm_mib { #define SNMP_DEC_STATS(mib, field) \ this_cpu_dec(mib->mibs[field]) -#define SNMP_ADD_STATS_BH(mib, field, addend) \ +#define __SNMP_ADD_STATS(mib, field, addend) \ __this_cpu_add(mib->mibs[field], addend) -#define SNMP_ADD_STATS_USER(mib, field, addend) \ - this_cpu_add(mib->mibs[field], addend) - #define SNMP_ADD_STATS(mib, field, addend) \ this_cpu_add(mib->mibs[field], addend) #define SNMP_UPD_PO_STATS(mib, basefield, addend) \ @@ -152,7 +146,7 @@ struct linux_xfrm_mib { this_cpu_inc(ptr[basefield##PKTS]); \ this_cpu_add(ptr[basefield##OCTETS], addend); \ } while (0) -#define SNMP_UPD_PO_STATS_BH(mib, basefield, addend) \ +#define __SNMP_UPD_PO_STATS(mib, basefield, addend) \ do { \ __typeof__((mib->mibs) + 0) ptr = mib->mibs; \ __this_cpu_inc(ptr[basefield##PKTS]); \ @@ -162,7 +156,7 @@ struct linux_xfrm_mib { #if BITS_PER_LONG==32 -#define SNMP_ADD_STATS64_BH(mib, field, addend) \ +#define __SNMP_ADD_STATS64(mib, field, addend) \ do { \ __typeof__(*mib) *ptr = raw_cpu_ptr(mib); \ u64_stats_update_begin(&ptr->syncp); \ @@ -170,20 +164,16 @@ struct linux_xfrm_mib { u64_stats_update_end(&ptr->syncp); \ } while (0) -#define SNMP_ADD_STATS64_USER(mib, field, addend) \ +#define SNMP_ADD_STATS64(mib, field, addend) \ do { \ local_bh_disable(); \ - SNMP_ADD_STATS64_BH(mib, field, addend); \ - local_bh_enable(); \ + __SNMP_ADD_STATS64(mib, field, addend); \ + local_bh_enable(); \ } while (0) -#define SNMP_ADD_STATS64(mib, field, addend) \ - SNMP_ADD_STATS64_USER(mib, field, addend) - -#define SNMP_INC_STATS64_BH(mib, field) SNMP_ADD_STATS64_BH(mib, field, 1) -#define SNMP_INC_STATS64_USER(mib, field) SNMP_ADD_STATS64_USER(mib, field, 1) +#define __SNMP_INC_STATS64(mib, field) SNMP_ADD_STATS64(mib, field, 1) #define SNMP_INC_STATS64(mib, field) SNMP_ADD_STATS64(mib, field, 1) -#define SNMP_UPD_PO_STATS64_BH(mib, basefield, addend) \ +#define __SNMP_UPD_PO_STATS64(mib, basefield, addend) \ do { \ __typeof__(*mib) *ptr; \ ptr = raw_cpu_ptr((mib)); \ @@ -195,19 +185,17 @@ struct linux_xfrm_mib { #define SNMP_UPD_PO_STATS64(mib, basefield, addend) \ do { \ local_bh_disable(); \ - SNMP_UPD_PO_STATS64_BH(mib, basefield, addend); \ - local_bh_enable(); \ + __SNMP_UPD_PO_STATS64(mib, basefield, addend); \ + local_bh_enable(); \ } while (0) #else -#define SNMP_INC_STATS64_BH(mib, field) SNMP_INC_STATS_BH(mib, field) -#define SNMP_INC_STATS64_USER(mib, field) SNMP_INC_STATS_USER(mib, field) +#define __SNMP_INC_STATS64(mib, field) __SNMP_INC_STATS(mib, field) #define SNMP_INC_STATS64(mib, field) SNMP_INC_STATS(mib, field) #define SNMP_DEC_STATS64(mib, field) SNMP_DEC_STATS(mib, field) -#define SNMP_ADD_STATS64_BH(mib, field, addend) SNMP_ADD_STATS_BH(mib, field, addend) -#define SNMP_ADD_STATS64_USER(mib, field, addend) SNMP_ADD_STATS_USER(mib, field, addend) +#define __SNMP_ADD_STATS64(mib, field, addend) __SNMP_ADD_STATS(mib, field, addend) #define SNMP_ADD_STATS64(mib, field, addend) SNMP_ADD_STATS(mib, field, addend) #define SNMP_UPD_PO_STATS64(mib, basefield, addend) SNMP_UPD_PO_STATS(mib, basefield, addend) -#define SNMP_UPD_PO_STATS64_BH(mib, basefield, addend) SNMP_UPD_PO_STATS_BH(mib, basefield, addend) +#define __SNMP_UPD_PO_STATS64(mib, basefield, addend) __SNMP_UPD_PO_STATS(mib, basefield, addend) #endif #endif diff --git a/include/net/sock.h b/include/net/sock.h index 255d3e03727b..ff5be7e8ddea 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -178,7 +178,7 @@ struct sock_common { int skc_bound_dev_if; union { struct hlist_node skc_bind_node; - struct hlist_nulls_node skc_portaddr_node; + struct hlist_node skc_portaddr_node; }; struct proto *skc_prot; possible_net_t skc_net; @@ -382,8 +382,13 @@ struct sock { atomic_t sk_omem_alloc; int sk_sndbuf; struct sk_buff_head sk_write_queue; + + /* + * Because of non atomicity rules, all + * changes are protected by socket lock. + */ kmemcheck_bitfield_begin(flags); - unsigned int sk_shutdown : 2, + unsigned int sk_padding : 2, sk_no_check_tx : 1, sk_no_check_rx : 1, sk_userlocks : 4, @@ -391,6 +396,7 @@ struct sock { sk_type : 16; #define SK_PROTOCOL_MAX U8_MAX kmemcheck_bitfield_end(flags); + int sk_wmem_queued; gfp_t sk_allocation; u32 sk_pacing_rate; /* bytes per second */ @@ -418,6 +424,7 @@ struct sock { struct timer_list sk_timer; ktime_t sk_stamp; u16 sk_tsflags; + u8 sk_shutdown; u32 sk_tskey; struct socket *sk_socket; void *sk_user_data; @@ -438,6 +445,7 @@ struct sock { struct sk_buff *skb); void (*sk_destruct)(struct sock *sk); struct sock_reuseport __rcu *sk_reuseport_cb; + struct rcu_head sk_rcu; }; #define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data))) @@ -456,28 +464,32 @@ struct sock { #define SK_CAN_REUSE 1 #define SK_FORCE_REUSE 2 +int sk_set_peek_off(struct sock *sk, int val); + static inline int sk_peek_offset(struct sock *sk, int flags) { - if ((flags & MSG_PEEK) && (sk->sk_peek_off >= 0)) - return sk->sk_peek_off; - else - return 0; + if (unlikely(flags & MSG_PEEK)) { + s32 off = READ_ONCE(sk->sk_peek_off); + if (off >= 0) + return off; + } + + return 0; } static inline void sk_peek_offset_bwd(struct sock *sk, int val) { - if (sk->sk_peek_off >= 0) { - if (sk->sk_peek_off >= val) - sk->sk_peek_off -= val; - else - sk->sk_peek_off = 0; + s32 off = READ_ONCE(sk->sk_peek_off); + + if (unlikely(off >= 0)) { + off = max_t(s32, off - val, 0); + WRITE_ONCE(sk->sk_peek_off, off); } } static inline void sk_peek_offset_fwd(struct sock *sk, int val) { - if (sk->sk_peek_off >= 0) - sk->sk_peek_off += val; + sk_peek_offset_bwd(sk, -val); } /* @@ -564,7 +576,7 @@ static inline bool __sk_del_node_init(struct sock *sk) modifications. */ -static inline void sock_hold(struct sock *sk) +static __always_inline void sock_hold(struct sock *sk) { atomic_inc(&sk->sk_refcnt); } @@ -572,7 +584,7 @@ static inline void sock_hold(struct sock *sk) /* Ungrab socket in the context, which assumes that socket refcnt cannot hit zero, f.e. it is true in context of any socketcall. */ -static inline void __sock_put(struct sock *sk) +static __always_inline void __sock_put(struct sock *sk) { atomic_dec(&sk->sk_refcnt); } @@ -625,12 +637,20 @@ static inline void sk_add_node(struct sock *sk, struct hlist_head *list) static inline void sk_add_node_rcu(struct sock *sk, struct hlist_head *list) { sock_hold(sk); - hlist_add_head_rcu(&sk->sk_node, list); + if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && + sk->sk_family == AF_INET6) + hlist_add_tail_rcu(&sk->sk_node, list); + else + hlist_add_head_rcu(&sk->sk_node, list); } static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) { - hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); + if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && + sk->sk_family == AF_INET6) + hlist_nulls_add_tail_rcu(&sk->sk_nulls_node, list); + else + hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); } static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) @@ -669,18 +689,18 @@ static inline void sk_add_bind_node(struct sock *sk, hlist_for_each_entry(__sk, list, sk_bind_node) /** - * sk_nulls_for_each_entry_offset - iterate over a list at a given struct offset + * sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset * @tpos: the type * to use as a loop cursor. * @pos: the &struct hlist_node to use as a loop cursor. * @head: the head for your list. * @offset: offset of hlist_node within the struct. * */ -#define sk_nulls_for_each_entry_offset(tpos, pos, head, offset) \ - for (pos = (head)->first; \ - (!is_a_nulls(pos)) && \ +#define sk_for_each_entry_offset_rcu(tpos, pos, head, offset) \ + for (pos = rcu_dereference((head)->first); \ + pos != NULL && \ ({ tpos = (typeof(*tpos) *)((void *)pos - offset); 1;}); \ - pos = pos->next) + pos = rcu_dereference(pos->next)) static inline struct user_namespace *sk_user_ns(struct sock *sk) { @@ -720,6 +740,7 @@ enum sock_flags { */ SOCK_FILTER_LOCKED, /* Filter cannot be changed anymore */ SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */ + SOCK_RCU_FREE, /* wait rcu grace period in sk_destruct() */ }; #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) @@ -912,6 +933,17 @@ void sk_stream_kill_queues(struct sock *sk); void sk_set_memalloc(struct sock *sk); void sk_clear_memalloc(struct sock *sk); +void __sk_flush_backlog(struct sock *sk); + +static inline bool sk_flush_backlog(struct sock *sk) +{ + if (unlikely(READ_ONCE(sk->sk_backlog.tail))) { + __sk_flush_backlog(sk); + return true; + } + return false; +} + int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb); struct request_sock_ops; @@ -1310,24 +1342,14 @@ static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) __kfree_skb(skb); } -/* Used by processes to "lock" a socket state, so that - * interrupts and bottom half handlers won't change it - * from under us. It essentially blocks any incoming - * packets, so that we won't get any new data or any - * packets that change the state of the socket. - * - * While locked, BH processing will add new packets to - * the backlog queue. This queue is processed by the - * owner of the socket lock right before it is released. - * - * Since ~2.3.5 it is also exclusive sleep lock serializing - * accesses from user process context. - */ -#define sock_owned_by_user(sk) ((sk)->sk_lock.owned) - static inline void sock_release_ownership(struct sock *sk) { - sk->sk_lock.owned = 0; + if (sk->sk_lock.owned) { + sk->sk_lock.owned = 0; + + /* The sk_lock has mutex_unlock() semantics: */ + mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); + } } /* @@ -1349,6 +1371,16 @@ do { \ lockdep_init_map(&(sk)->sk_lock.dep_map, (name), (key), 0); \ } while (0) +#ifdef CONFIG_LOCKDEP +static inline bool lockdep_sock_is_held(const struct sock *csk) +{ + struct sock *sk = (struct sock *)csk; + + return lockdep_is_held(&sk->sk_lock) || + lockdep_is_held(&sk->sk_lock.slock); +} +#endif + void lock_sock_nested(struct sock *sk, int subclass); static inline void lock_sock(struct sock *sk) @@ -1382,6 +1414,40 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow) spin_unlock_bh(&sk->sk_lock.slock); } +/* Used by processes to "lock" a socket state, so that + * interrupts and bottom half handlers won't change it + * from under us. It essentially blocks any incoming + * packets, so that we won't get any new data or any + * packets that change the state of the socket. + * + * While locked, BH processing will add new packets to + * the backlog queue. This queue is processed by the + * owner of the socket lock right before it is released. + * + * Since ~2.3.5 it is also exclusive sleep lock serializing + * accesses from user process context. + */ + +static inline void sock_owned_by_me(const struct sock *sk) +{ +#ifdef CONFIG_LOCKDEP + WARN_ON_ONCE(!lockdep_sock_is_held(sk) && debug_locks); +#endif +} + +static inline bool sock_owned_by_user(const struct sock *sk) +{ + sock_owned_by_me(sk); + return sk->sk_lock.owned; +} + +/* no reclassification while locks are held */ +static inline bool sock_allow_reclassification(const struct sock *csk) +{ + struct sock *sk = (struct sock *)csk; + + return !sk->sk_lock.owned && !spin_is_locked(&sk->sk_lock.slock); +} struct sock *sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot, int kern); @@ -1391,6 +1457,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority); struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, gfp_t priority); +void __sock_wfree(struct sk_buff *skb); void sock_wfree(struct sk_buff *skb); void skb_orphan_partial(struct sk_buff *skb); void sock_rfree(struct sk_buff *skb); @@ -1418,8 +1485,11 @@ void sk_send_sigurg(struct sock *sk); struct sockcm_cookie { u32 mark; + u16 tsflags; }; +int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg, + struct sockcm_cookie *sockc); int sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct sockcm_cookie *sockc); @@ -1506,7 +1576,13 @@ static inline void sock_put(struct sock *sk) */ void sock_gen_put(struct sock *sk); -int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested); +int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested, + unsigned int trim_cap); +static inline int sk_receive_skb(struct sock *sk, struct sk_buff *skb, + const int nested) +{ + return __sk_receive_skb(sk, skb, nested, 1); +} static inline void sk_tx_queue_set(struct sock *sk, int tx_queue) { @@ -1584,8 +1660,8 @@ static inline void sk_rethink_txhash(struct sock *sk) static inline struct dst_entry * __sk_dst_get(struct sock *sk) { - return rcu_dereference_check(sk->sk_dst_cache, sock_owned_by_user(sk) || - lockdep_is_held(&sk->sk_lock.slock)); + return rcu_dereference_check(sk->sk_dst_cache, + lockdep_sock_is_held(sk)); } static inline struct dst_entry * @@ -1857,6 +1933,7 @@ void sk_reset_timer(struct sock *sk, struct timer_list *timer, void sk_stop_timer(struct sock *sk, struct timer_list *timer); +int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb); @@ -1893,11 +1970,19 @@ static inline unsigned long sock_wspace(struct sock *sk) */ static inline void sk_set_bit(int nr, struct sock *sk) { + if ((nr == SOCKWQ_ASYNC_NOSPACE || nr == SOCKWQ_ASYNC_WAITDATA) && + !sock_flag(sk, SOCK_FASYNC)) + return; + set_bit(nr, &sk->sk_wq_raw->flags); } static inline void sk_clear_bit(int nr, struct sock *sk) { + if ((nr == SOCKWQ_ASYNC_NOSPACE || nr == SOCKWQ_ASYNC_WAITDATA) && + !sock_flag(sk, SOCK_FASYNC)) + return; + clear_bit(nr, &sk->sk_wq_raw->flags); } @@ -2007,6 +2092,13 @@ sock_skb_set_dropcount(const struct sock *sk, struct sk_buff *skb) SOCK_SKB_CB(skb)->dropcount = atomic_read(&sk->sk_drops); } +static inline void sk_drops_add(struct sock *sk, const struct sk_buff *skb) +{ + int segs = max_t(u16, 1, skb_shinfo(skb)->gso_segs); + + atomic_add(segs, &sk->sk_drops); +} + void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb); void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk, @@ -2054,19 +2146,21 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, sk->sk_stamp = skb->tstamp; } -void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags); +void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags); /** * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped * @sk: socket sending this packet + * @tsflags: timestamping flags to use * @tx_flags: completed with instructions for time stamping * * Note : callers should take care of initial *tx_flags value (usually 0) */ -static inline void sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags) +static inline void sock_tx_timestamp(const struct sock *sk, __u16 tsflags, + __u8 *tx_flags) { - if (unlikely(sk->sk_tsflags)) - __sock_tx_timestamp(sk, tx_flags); + if (unlikely(tsflags)) + __sock_tx_timestamp(tsflags, tx_flags); if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS))) *tx_flags |= SKBTX_WIFI_STATUS; } diff --git a/include/net/switchdev.h b/include/net/switchdev.h index d451122e8404..62f6a967a1b7 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -54,11 +54,13 @@ struct switchdev_attr { struct net_device *orig_dev; enum switchdev_attr_id id; u32 flags; + void *complete_priv; + void (*complete)(struct net_device *dev, int err, void *priv); union { struct netdev_phys_item_id ppid; /* PORT_PARENT_ID */ u8 stp_state; /* PORT_STP_STATE */ unsigned long brport_flags; /* PORT_BRIDGE_FLAGS */ - u32 ageing_time; /* BRIDGE_AGEING_TIME */ + clock_t ageing_time; /* BRIDGE_AGEING_TIME */ bool vlan_filtering; /* BRIDGE_VLAN_FILTERING */ } u; }; @@ -75,6 +77,8 @@ struct switchdev_obj { struct net_device *orig_dev; enum switchdev_obj_id id; u32 flags; + void *complete_priv; + void (*complete)(struct net_device *dev, int err, void *priv); }; /* SWITCHDEV_OBJ_ID_PORT_VLAN */ @@ -93,7 +97,7 @@ struct switchdev_obj_ipv4_fib { struct switchdev_obj obj; u32 dst; int dst_len; - struct fib_info fi; + struct fib_info *fi; u8 tos; u8 type; u32 nlflags; @@ -223,6 +227,8 @@ void switchdev_port_fwd_mark_set(struct net_device *dev, struct net_device *group_dev, bool joining); +bool switchdev_port_same_parent_id(struct net_device *a, + struct net_device *b); #else static inline void switchdev_deferred_process(void) @@ -347,6 +353,12 @@ static inline void switchdev_port_fwd_mark_set(struct net_device *dev, { } +static inline bool switchdev_port_same_parent_id(struct net_device *a, + struct net_device *b) +{ + return false; +} + #endif #endif /* _LINUX_SWITCHDEV_H_ */ diff --git a/include/net/tc_act/tc_bpf.h b/include/net/tc_act/tc_bpf.h index 958d69cfb19c..2b94673a3dbc 100644 --- a/include/net/tc_act/tc_bpf.h +++ b/include/net/tc_act/tc_bpf.h @@ -14,7 +14,7 @@ #include <net/act_api.h> struct tcf_bpf { - struct tcf_common common; + struct tc_action common; struct bpf_prog __rcu *filter; union { u32 bpf_fd; @@ -23,7 +23,6 @@ struct tcf_bpf { struct sock_filter *bpf_ops; const char *bpf_name; }; -#define to_bpf(a) \ - container_of(a->priv, struct tcf_bpf, common) +#define to_bpf(a) ((struct tcf_bpf *)a) #endif /* __NET_TC_BPF_H */ diff --git a/include/net/tc_act/tc_connmark.h b/include/net/tc_act/tc_connmark.h index 02caa406611b..59b515d32bb4 100644 --- a/include/net/tc_act/tc_connmark.h +++ b/include/net/tc_act/tc_connmark.h @@ -4,12 +4,11 @@ #include <net/act_api.h> struct tcf_connmark_info { - struct tcf_common common; + struct tc_action common; struct net *net; u16 zone; }; -#define to_connmark(a) \ - container_of(a->priv, struct tcf_connmark_info, common) +#define to_connmark(a) ((struct tcf_connmark_info *)a) #endif /* __NET_TC_CONNMARK_H */ diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h index fa8f5fac65e9..f31fb6331a53 100644 --- a/include/net/tc_act/tc_csum.h +++ b/include/net/tc_act/tc_csum.h @@ -5,11 +5,10 @@ #include <net/act_api.h> struct tcf_csum { - struct tcf_common common; + struct tc_action common; u32 update_flags; }; -#define to_tcf_csum(a) \ - container_of(a->priv,struct tcf_csum,common) +#define to_tcf_csum(a) ((struct tcf_csum *)a) #endif /* __NET_TC_CSUM_H */ diff --git a/include/net/tc_act/tc_defact.h b/include/net/tc_act/tc_defact.h index 9763dcbb9bc3..d47f040a3bdf 100644 --- a/include/net/tc_act/tc_defact.h +++ b/include/net/tc_act/tc_defact.h @@ -4,11 +4,10 @@ #include <net/act_api.h> struct tcf_defact { - struct tcf_common common; - u32 tcfd_datalen; - void *tcfd_defdata; + struct tc_action common; + u32 tcfd_datalen; + void *tcfd_defdata; }; -#define to_defact(a) \ - container_of(a->priv, struct tcf_defact, common) +#define to_defact(a) ((struct tcf_defact *)a) #endif /* __NET_TC_DEF_H */ diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h index 93c520b83d10..b6f173910226 100644 --- a/include/net/tc_act/tc_gact.h +++ b/include/net/tc_act/tc_gact.h @@ -5,7 +5,7 @@ #include <linux/tc_act/tc_gact.h> struct tcf_gact { - struct tcf_common common; + struct tc_action common; #ifdef CONFIG_GACT_PROB u16 tcfg_ptype; u16 tcfg_pval; @@ -13,8 +13,7 @@ struct tcf_gact { atomic_t packets; #endif }; -#define to_gact(a) \ - container_of(a->priv, struct tcf_gact, common) +#define to_gact(a) ((struct tcf_gact *)a) static inline bool is_tcf_gact_shot(const struct tc_action *a) { @@ -24,7 +23,7 @@ static inline bool is_tcf_gact_shot(const struct tc_action *a) if (a->ops && a->ops->type != TCA_ACT_GACT) return false; - gact = a->priv; + gact = to_gact(a); if (gact->tcf_action == TC_ACT_SHOT) return true; diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h index dc9a09aefb33..5164bd7a38fb 100644 --- a/include/net/tc_act/tc_ife.h +++ b/include/net/tc_act/tc_ife.h @@ -8,7 +8,7 @@ #define IFE_METAHDRLEN 2 struct tcf_ife_info { - struct tcf_common common; + struct tc_action common; u8 eth_dst[ETH_ALEN]; u8 eth_src[ETH_ALEN]; u16 eth_type; @@ -16,8 +16,7 @@ struct tcf_ife_info { /* list of metaids allowed */ struct list_head metalist; }; -#define to_ife(a) \ - container_of(a->priv, struct tcf_ife_info, common) +#define to_ife(a) ((struct tcf_ife_info *)a) struct tcf_meta_info { const struct tcf_meta_ops *ops; @@ -36,7 +35,7 @@ struct tcf_meta_ops { int (*encode)(struct sk_buff *, void *, struct tcf_meta_info *); int (*decode)(struct sk_buff *, void *, u16 len); int (*get)(struct sk_buff *skb, struct tcf_meta_info *mi); - int (*alloc)(struct tcf_meta_info *, void *); + int (*alloc)(struct tcf_meta_info *, void *, gfp_t); void (*release)(struct tcf_meta_info *); int (*validate)(void *val, int len); struct module *owner; @@ -48,8 +47,8 @@ int ife_get_meta_u32(struct sk_buff *skb, struct tcf_meta_info *mi); int ife_get_meta_u16(struct sk_buff *skb, struct tcf_meta_info *mi); int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval); -int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval); -int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval); +int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval, gfp_t gfp); +int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval, gfp_t gfp); int ife_check_meta_u32(u32 metaval, struct tcf_meta_info *mi); int ife_encode_meta_u32(u32 metaval, void *skbdata, struct tcf_meta_info *mi); int ife_validate_meta_u32(void *val, int len); diff --git a/include/net/tc_act/tc_ipt.h b/include/net/tc_act/tc_ipt.h index c0f4193f432c..31309766e379 100644 --- a/include/net/tc_act/tc_ipt.h +++ b/include/net/tc_act/tc_ipt.h @@ -6,12 +6,11 @@ struct xt_entry_target; struct tcf_ipt { - struct tcf_common common; + struct tc_action common; u32 tcfi_hook; char *tcfi_tname; struct xt_entry_target *tcfi_t; }; -#define to_ipt(a) \ - container_of(a->priv, struct tcf_ipt, common) +#define to_ipt(a) ((struct tcf_ipt *)a) #endif /* __NET_TC_IPT_H */ diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h index dae96bae1c19..62770add15bd 100644 --- a/include/net/tc_act/tc_mirred.h +++ b/include/net/tc_act/tc_mirred.h @@ -2,16 +2,39 @@ #define __NET_TC_MIR_H #include <net/act_api.h> +#include <linux/tc_act/tc_mirred.h> struct tcf_mirred { - struct tcf_common common; + struct tc_action common; int tcfm_eaction; int tcfm_ifindex; int tcfm_ok_push; struct net_device __rcu *tcfm_dev; struct list_head tcfm_list; }; -#define to_mirred(a) \ - container_of(a->priv, struct tcf_mirred, common) +#define to_mirred(a) ((struct tcf_mirred *)a) + +static inline bool is_tcf_mirred_redirect(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + if (a->ops && a->ops->type == TCA_ACT_MIRRED) + return to_mirred(a)->tcfm_eaction == TCA_EGRESS_REDIR; +#endif + return false; +} + +static inline bool is_tcf_mirred_mirror(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + if (a->ops && a->ops->type == TCA_ACT_MIRRED) + return to_mirred(a)->tcfm_eaction == TCA_EGRESS_MIRROR; +#endif + return false; +} + +static inline int tcf_mirred_ifindex(const struct tc_action *a) +{ + return to_mirred(a)->tcfm_ifindex; +} #endif /* __NET_TC_MIR_H */ diff --git a/include/net/tc_act/tc_nat.h b/include/net/tc_act/tc_nat.h index 63d8e9ca9d99..56681a320612 100644 --- a/include/net/tc_act/tc_nat.h +++ b/include/net/tc_act/tc_nat.h @@ -5,7 +5,7 @@ #include <net/act_api.h> struct tcf_nat { - struct tcf_common common; + struct tc_action common; __be32 old_addr; __be32 new_addr; @@ -13,9 +13,6 @@ struct tcf_nat { u32 flags; }; -static inline struct tcf_nat *to_tcf_nat(struct tc_action *a) -{ - return container_of(a->priv, struct tcf_nat, common); -} +#define to_tcf_nat(a) ((struct tcf_nat *)a) #endif /* __NET_TC_NAT_H */ diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h index 5b80998879c7..29e38d6823df 100644 --- a/include/net/tc_act/tc_pedit.h +++ b/include/net/tc_act/tc_pedit.h @@ -4,12 +4,11 @@ #include <net/act_api.h> struct tcf_pedit { - struct tcf_common common; + struct tc_action common; unsigned char tcfp_nkeys; unsigned char tcfp_flags; struct tc_pedit_key *tcfp_keys; }; -#define to_pedit(a) \ - container_of(a->priv, struct tcf_pedit, common) +#define to_pedit(a) ((struct tcf_pedit *)a) #endif /* __NET_TC_PED_H */ diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h index b496d5ad7d42..5767e9dbcf92 100644 --- a/include/net/tc_act/tc_skbedit.h +++ b/include/net/tc_act/tc_skbedit.h @@ -23,15 +23,14 @@ #include <linux/tc_act/tc_skbedit.h> struct tcf_skbedit { - struct tcf_common common; - u32 flags; - u32 priority; - u32 mark; - u16 queue_mapping; - /* XXX: 16-bit pad here? */ + struct tc_action common; + u32 flags; + u32 priority; + u32 mark; + u16 queue_mapping; + u16 ptype; }; -#define to_skbedit(a) \ - container_of(a->priv, struct tcf_skbedit, common) +#define to_skbedit(a) ((struct tcf_skbedit *)a) /* Return true iff action is mark */ static inline bool is_tcf_skbedit_mark(const struct tc_action *a) diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h index 93b70ade1ff3..e29f52e8bdf1 100644 --- a/include/net/tc_act/tc_vlan.h +++ b/include/net/tc_act/tc_vlan.h @@ -16,12 +16,11 @@ #define VLAN_F_PUSH 0x2 struct tcf_vlan { - struct tcf_common common; + struct tc_action common; int tcfv_action; u16 tcfv_push_vid; __be16 tcfv_push_proto; }; -#define to_vlan(a) \ - container_of(a->priv, struct tcf_vlan, common) +#define to_vlan(a) ((struct tcf_vlan *)a) #endif /* __NET_TC_VLAN_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index b91370f61be6..c00e7d51bb18 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -332,9 +332,8 @@ bool tcp_check_oom(struct sock *sk, int shift); extern struct proto tcp_prot; #define TCP_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.tcp_statistics, field) -#define TCP_INC_STATS_BH(net, field) SNMP_INC_STATS_BH((net)->mib.tcp_statistics, field) +#define __TCP_INC_STATS(net, field) __SNMP_INC_STATS((net)->mib.tcp_statistics, field) #define TCP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->mib.tcp_statistics, field) -#define TCP_ADD_STATS_USER(net, field, val) SNMP_ADD_STATS_USER((net)->mib.tcp_statistics, field, val) #define TCP_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val) void tcp_tasklet_init(void); @@ -452,10 +451,15 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb); int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); int tcp_connect(struct sock *sk); +enum tcp_synack_type { + TCP_SYNACK_NORMAL, + TCP_SYNACK_FASTOPEN, + TCP_SYNACK_COOKIE, +}; struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, struct request_sock *req, struct tcp_fastopen_cookie *foc, - bool attach_req); + enum tcp_synack_type synack_type); int tcp_disconnect(struct sock *sk, int flags); void tcp_finish_connect(struct sock *sk, struct sk_buff *skb); @@ -533,8 +537,8 @@ __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss); void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, int nonagle); bool tcp_may_send_now(struct sock *sk); -int __tcp_retransmit_skb(struct sock *, struct sk_buff *); -int tcp_retransmit_skb(struct sock *, struct sk_buff *); +int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs); +int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs); void tcp_retransmit_timer(struct sock *sk); void tcp_xmit_retransmit_queue(struct sock *); void tcp_simple_retransmit(struct sock *); @@ -552,6 +556,8 @@ void tcp_send_ack(struct sock *sk); void tcp_send_delayed_ack(struct sock *sk); void tcp_send_loss_probe(struct sock *sk); bool tcp_schedule_loss_probe(struct sock *sk); +void tcp_skb_collapse_tstamp(struct sk_buff *skb, + const struct sk_buff *next_skb); /* tcp_input.c */ void tcp_resume_early_retransmit(struct sock *sk); @@ -583,7 +589,7 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) * On the other hand, for extremely large MSS devices, handling * smaller than MSS windows in this way does make sense. */ - if (tp->max_window >= 512) + if (tp->max_window > TCP_MSS_DEFAULT) cutoff = (tp->max_window >> 1); else cutoff = tp->max_window; @@ -754,14 +760,22 @@ struct tcp_skb_cb { TCPCB_REPAIRED) __u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */ - /* 1 byte hole */ + __u8 txstamp_ack:1, /* Record TX timestamp for ack? */ + eor:1, /* Is skb MSG_EOR marked? */ + unused:6; __u32 ack_seq; /* Sequence number ACK'd */ union { - struct inet_skb_parm h4; + struct { + /* There is space for up to 20 bytes */ + __u32 in_flight;/* Bytes in flight when packet sent */ + } tx; /* only used for outgoing skbs */ + union { + struct inet_skb_parm h4; #if IS_ENABLED(CONFIG_IPV6) - struct inet6_skb_parm h6; + struct inet6_skb_parm h6; #endif - } header; /* For incoming frames */ + } header; /* For incoming skbs */ + }; }; #define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0])) @@ -773,7 +787,9 @@ struct tcp_skb_cb { */ static inline int tcp_v6_iif(const struct sk_buff *skb) { - return TCP_SKB_CB(skb)->header.h6.iif; + bool l3_slave = skb_l3mdev_slave(TCP_SKB_CB(skb)->header.h6.flags); + + return l3_slave ? skb->skb_iif : TCP_SKB_CB(skb)->header.h6.iif; } #endif @@ -801,6 +817,11 @@ static inline int tcp_skb_mss(const struct sk_buff *skb) return TCP_SKB_CB(skb)->tcp_gso_size; } +static inline bool tcp_skb_can_collapse_to(const struct sk_buff *skb) +{ + return likely(!TCP_SKB_CB(skb)->eor); +} + /* Events passed to congestion control interface */ enum tcp_ca_event { CA_EVENT_TX_START, /* first transmit when no packets in flight */ @@ -836,6 +857,12 @@ enum tcp_ca_ack_event_flags { union tcp_cc_info; +struct ack_sample { + u32 pkts_acked; + s32 rtt_us; + u32 in_flight; +}; + struct tcp_congestion_ops { struct list_head list; u32 key; @@ -859,7 +886,7 @@ struct tcp_congestion_ops { /* new value of cwnd after loss (optional) */ u32 (*undo_cwnd)(struct sock *sk); /* hook for packet ack accounting (optional) */ - void (*pkts_acked)(struct sock *sk, u32 num_acked, s32 rtt_us); + void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); /* get info for inet_diag (optional) */ size_t (*get_info)(struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info); @@ -1039,17 +1066,6 @@ static inline __u32 tcp_max_tso_deferred_mss(const struct tcp_sock *tp) return 3; } -/* Slow start with delack produces 3 packets of burst, so that - * it is safe "de facto". This will be the default - same as - * the default reordering threshold - but if reordering increases, - * we must be able to allow cwnd to burst at least this much in order - * to not pull it back when holes are filled. - */ -static __inline__ __u32 tcp_max_burst(const struct tcp_sock *tp) -{ - return tp->reordering; -} - /* Returns end sequence number of the receiver's advertised window */ static inline u32 tcp_wnd_end(const struct tcp_sock *tp) { @@ -1301,10 +1317,10 @@ bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb, static inline void tcp_mib_init(struct net *net) { /* See RFC 2012 */ - TCP_ADD_STATS_USER(net, TCP_MIB_RTOALGORITHM, 1); - TCP_ADD_STATS_USER(net, TCP_MIB_RTOMIN, TCP_RTO_MIN*1000/HZ); - TCP_ADD_STATS_USER(net, TCP_MIB_RTOMAX, TCP_RTO_MAX*1000/HZ); - TCP_ADD_STATS_USER(net, TCP_MIB_MAXCONN, -1); + TCP_ADD_STATS(net, TCP_MIB_RTOALGORITHM, 1); + TCP_ADD_STATS(net, TCP_MIB_RTOMIN, TCP_RTO_MIN*1000/HZ); + TCP_ADD_STATS(net, TCP_MIB_RTOMAX, TCP_RTO_MAX*1000/HZ); + TCP_ADD_STATS(net, TCP_MIB_MAXCONN, -1); } /* from STCP */ @@ -1368,7 +1384,7 @@ union tcp_md5sum_block { /* - pool: digest algorithm, hash description and scratch buffer */ struct tcp_md5sig_pool { struct ahash_request *md5_req; - union tcp_md5sum_block md5_blk; + void *scratch; }; /* - functions */ @@ -1404,7 +1420,6 @@ static inline void tcp_put_md5sig_pool(void) local_bh_enable(); } -int tcp_md5_hash_header(struct tcp_md5sig_pool *, const struct tcphdr *); int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff *, unsigned int header_len); int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, @@ -1738,7 +1753,7 @@ struct tcp_request_sock_ops { int (*send_synack)(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, struct tcp_fastopen_cookie *foc, - bool attach_req); + enum tcp_synack_type synack_type); }; #ifdef CONFIG_SYN_COOKIES @@ -1747,7 +1762,7 @@ static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops, __u16 *mss) { tcp_synq_overflow(sk); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); return ops->cookie_init_seq(skb, mss); } #else @@ -1846,4 +1861,17 @@ static inline void tcp_segs_in(struct tcp_sock *tp, const struct sk_buff *skb) tp->data_segs_in += segs_in; } +/* + * TCP listen path runs lockless. + * We forced "struct sock" to be const qualified to make sure + * we don't modify one of its field by mistake. + * Here, we increment sk_drops which is an atomic_t, so we can safely + * make sock writable again. + */ +static inline void tcp_listendrop(const struct sock *sk) +{ + atomic_inc(&((struct sock *)sk)->sk_drops); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS); +} + #endif /* _TCP_H */ diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index b927413dde86..276f9760ab56 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -41,8 +41,8 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb); int ip6_datagram_send_ctl(struct net *net, struct sock *sk, struct msghdr *msg, - struct flowi6 *fl6, struct ipv6_txoptions *opt, - int *hlimit, int *tclass, int *dontfrag); + struct flowi6 *fl6, struct ipcm6_cookie *ipc6, + struct sockcm_cookie *sockc); void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp, __u16 srcp, __u16 destp, int bucket); diff --git a/include/net/udp.h b/include/net/udp.h index 92927f729ac8..8894d7144189 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -59,7 +59,7 @@ struct udp_skb_cb { * @lock: spinlock protecting changes to head/count */ struct udp_hslot { - struct hlist_nulls_head head; + struct hlist_head head; int count; spinlock_t lock; } __attribute__((aligned(2 * sizeof(long)))); @@ -158,9 +158,21 @@ static inline __sum16 udp_v4_check(int len, __be32 saddr, void udp_set_csum(bool nocheck, struct sk_buff *skb, __be32 saddr, __be32 daddr, int len); +static inline void udp_csum_pull_header(struct sk_buff *skb) +{ + if (!skb->csum_valid && skb->ip_summed == CHECKSUM_NONE) + skb->csum = csum_partial(skb->data, sizeof(struct udphdr), + skb->csum); + skb_pull_rcsum(skb, sizeof(struct udphdr)); + UDP_SKB_CB(skb)->cscov -= sizeof(struct udphdr); +} + +typedef struct sock *(*udp_lookup_t)(struct sk_buff *skb, __be16 sport, + __be16 dport); + struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, - struct udphdr *uh); -int udp_gro_complete(struct sk_buff *skb, int nhoff); + struct udphdr *uh, udp_lookup_t lookup); +int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup); static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) { @@ -260,6 +272,8 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif, struct udp_table *tbl, struct sk_buff *skb); +struct sock *udp4_lib_lookup_skb(struct sk_buff *skb, + __be16 sport, __be16 dport); struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, @@ -269,36 +283,38 @@ struct sock *__udp6_lib_lookup(struct net *net, const struct in6_addr *daddr, __be16 dport, int dif, struct udp_table *tbl, struct sk_buff *skb); +struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, + __be16 sport, __be16 dport); /* * SNMP statistics for UDP and UDP-Lite */ -#define UDP_INC_STATS_USER(net, field, is_udplite) do { \ - if (is_udplite) SNMP_INC_STATS_USER((net)->mib.udplite_statistics, field); \ - else SNMP_INC_STATS_USER((net)->mib.udp_statistics, field); } while(0) -#define UDP_INC_STATS_BH(net, field, is_udplite) do { \ - if (is_udplite) SNMP_INC_STATS_BH((net)->mib.udplite_statistics, field); \ - else SNMP_INC_STATS_BH((net)->mib.udp_statistics, field); } while(0) - -#define UDP6_INC_STATS_BH(net, field, is_udplite) do { \ - if (is_udplite) SNMP_INC_STATS_BH((net)->mib.udplite_stats_in6, field);\ - else SNMP_INC_STATS_BH((net)->mib.udp_stats_in6, field); \ +#define UDP_INC_STATS(net, field, is_udplite) do { \ + if (is_udplite) SNMP_INC_STATS((net)->mib.udplite_statistics, field); \ + else SNMP_INC_STATS((net)->mib.udp_statistics, field); } while(0) +#define __UDP_INC_STATS(net, field, is_udplite) do { \ + if (is_udplite) __SNMP_INC_STATS((net)->mib.udplite_statistics, field); \ + else __SNMP_INC_STATS((net)->mib.udp_statistics, field); } while(0) + +#define __UDP6_INC_STATS(net, field, is_udplite) do { \ + if (is_udplite) __SNMP_INC_STATS((net)->mib.udplite_stats_in6, field);\ + else __SNMP_INC_STATS((net)->mib.udp_stats_in6, field); \ } while(0) -#define UDP6_INC_STATS_USER(net, field, __lite) do { \ - if (__lite) SNMP_INC_STATS_USER((net)->mib.udplite_stats_in6, field); \ - else SNMP_INC_STATS_USER((net)->mib.udp_stats_in6, field); \ +#define UDP6_INC_STATS(net, field, __lite) do { \ + if (__lite) SNMP_INC_STATS((net)->mib.udplite_stats_in6, field); \ + else SNMP_INC_STATS((net)->mib.udp_stats_in6, field); \ } while(0) #if IS_ENABLED(CONFIG_IPV6) -#define UDPX_INC_STATS_BH(sk, field) \ +#define __UDPX_INC_STATS(sk, field) \ do { \ if ((sk)->sk_family == AF_INET) \ - UDP_INC_STATS_BH(sock_net(sk), field, 0); \ + __UDP_INC_STATS(sock_net(sk), field, 0); \ else \ - UDP6_INC_STATS_BH(sock_net(sk), field, 0); \ + __UDP6_INC_STATS(sock_net(sk), field, 0); \ } while (0) #else -#define UDPX_INC_STATS_BH(sk, field) UDP_INC_STATS_BH(sock_net(sk), field, 0) +#define __UDPX_INC_STATS(sk, field) __UDP_INC_STATS(sock_net(sk), field, 0) #endif /* /proc */ diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index b83114077cee..02c5be037451 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -64,6 +64,11 @@ static inline int udp_sock_create(struct net *net, typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb); typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk); +typedef struct sk_buff **(*udp_tunnel_gro_receive_t)(struct sock *sk, + struct sk_buff **head, + struct sk_buff *skb); +typedef int (*udp_tunnel_gro_complete_t)(struct sock *sk, struct sk_buff *skb, + int nhoff); struct udp_tunnel_sock_cfg { void *sk_user_data; /* user data used by encap_rcv call back */ @@ -71,12 +76,54 @@ struct udp_tunnel_sock_cfg { __u8 encap_type; udp_tunnel_encap_rcv_t encap_rcv; udp_tunnel_encap_destroy_t encap_destroy; + udp_tunnel_gro_receive_t gro_receive; + udp_tunnel_gro_complete_t gro_complete; }; /* Setup the given (UDP) sock to receive UDP encapsulated packets */ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, struct udp_tunnel_sock_cfg *sock_cfg); +/* -- List of parsable UDP tunnel types -- + * + * Adding to this list will result in serious debate. The main issue is + * that this list is essentially a list of workarounds for either poorly + * designed tunnels, or poorly designed device offloads. + * + * The parsing supported via these types should really be used for Rx + * traffic only as the network stack will have already inserted offsets for + * the location of the headers in the skb. In addition any ports that are + * pushed should be kept within the namespace without leaking to other + * devices such as VFs or other ports on the same device. + * + * It is strongly encouraged to use CHECKSUM_COMPLETE for Rx to avoid the + * need to use this for Rx checksum offload. It should not be necessary to + * call this function to perform Tx offloads on outgoing traffic. + */ +enum udp_parsable_tunnel_type { + UDP_TUNNEL_TYPE_VXLAN, /* RFC 7348 */ + UDP_TUNNEL_TYPE_GENEVE, /* draft-ietf-nvo3-geneve */ + UDP_TUNNEL_TYPE_VXLAN_GPE, /* draft-ietf-nvo3-vxlan-gpe */ +}; + +struct udp_tunnel_info { + unsigned short type; + sa_family_t sa_family; + __be16 port; +}; + +/* Notify network devices of offloadable types */ +void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock, + unsigned short type); +void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type); +void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type); + +static inline void udp_tunnel_get_rx_info(struct net_device *dev) +{ + ASSERT_RTNL(); + call_netdevice_notifiers(NETDEV_UDP_TUNNEL_PUSH_INFO, dev); +} + /* Transmit the skb using UDP encapsulation. */ void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, @@ -98,22 +145,14 @@ struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, __be16 flags, __be64 tunnel_id, int md_size); -static inline struct sk_buff *udp_tunnel_handle_offloads(struct sk_buff *skb, - bool udp_csum) +#ifdef CONFIG_INET +static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum) { int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; return iptunnel_handle_offloads(skb, type); } - -static inline void udp_tunnel_gro_complete(struct sk_buff *skb, int nhoff) -{ - struct udphdr *uh; - - uh = (struct udphdr *)(skb->data + nhoff - sizeof(struct udphdr)); - skb_shinfo(skb)->gso_type |= uh->check ? - SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; -} +#endif static inline void udp_tunnel_encap_enable(struct socket *sock) { diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 73ed2e951c02..b96d0360c095 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -1,13 +1,10 @@ #ifndef __NET_VXLAN_H #define __NET_VXLAN_H 1 -#include <linux/ip.h> -#include <linux/ipv6.h> #include <linux/if_vlan.h> -#include <linux/skbuff.h> -#include <linux/netdevice.h> -#include <linux/udp.h> +#include <net/udp_tunnel.h> #include <net/dst_metadata.h> +#include <net/udp_tunnel.h> /* VXLAN protocol (RFC 7348) header: * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ @@ -119,6 +116,64 @@ struct vxlanhdr_gbp { #define VXLAN_GBP_POLICY_APPLIED (BIT(3) << 16) #define VXLAN_GBP_ID_MASK (0xFFFF) +/* + * VXLAN Generic Protocol Extension (VXLAN_F_GPE): + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |R|R|Ver|I|P|R|O| Reserved |Next Protocol | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | VXLAN Network Identifier (VNI) | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * Ver = Version. Indicates VXLAN GPE protocol version. + * + * P = Next Protocol Bit. The P bit is set to indicate that the + * Next Protocol field is present. + * + * O = OAM Flag Bit. The O bit is set to indicate that the packet + * is an OAM packet. + * + * Next Protocol = This 8 bit field indicates the protocol header + * immediately following the VXLAN GPE header. + * + * https://tools.ietf.org/html/draft-ietf-nvo3-vxlan-gpe-01 + */ + +struct vxlanhdr_gpe { +#if defined(__LITTLE_ENDIAN_BITFIELD) + u8 oam_flag:1, + reserved_flags1:1, + np_applied:1, + instance_applied:1, + version:2, +reserved_flags2:2; +#elif defined(__BIG_ENDIAN_BITFIELD) + u8 reserved_flags2:2, + version:2, + instance_applied:1, + np_applied:1, + reserved_flags1:1, + oam_flag:1; +#endif + u8 reserved_flags3; + u8 reserved_flags4; + u8 next_protocol; + __be32 vx_vni; +}; + +/* VXLAN-GPE header flags. */ +#define VXLAN_HF_VER cpu_to_be32(BIT(29) | BIT(28)) +#define VXLAN_HF_NP cpu_to_be32(BIT(26)) +#define VXLAN_HF_OAM cpu_to_be32(BIT(24)) + +#define VXLAN_GPE_USED_BITS (VXLAN_HF_VER | VXLAN_HF_NP | VXLAN_HF_OAM | \ + cpu_to_be32(0xff)) + +/* VXLAN-GPE header Next Protocol. */ +#define VXLAN_GPE_NP_IPV4 0x01 +#define VXLAN_GPE_NP_IPV6 0x02 +#define VXLAN_GPE_NP_ETHERNET 0x03 +#define VXLAN_GPE_NP_NSH 0x04 + struct vxlan_metadata { u32 gbp; }; @@ -126,12 +181,9 @@ struct vxlan_metadata { /* per UDP socket information */ struct vxlan_sock { struct hlist_node hlist; - struct work_struct del_work; struct socket *sock; - struct rcu_head rcu; struct hlist_head vni_list[VNI_HASH_SIZE]; atomic_t refcnt; - struct udp_offload udp_offloads; u32 flags; }; @@ -206,16 +258,26 @@ struct vxlan_dev { #define VXLAN_F_GBP 0x800 #define VXLAN_F_REMCSUM_NOPARTIAL 0x1000 #define VXLAN_F_COLLECT_METADATA 0x2000 +#define VXLAN_F_GPE 0x4000 /* Flags that are used in the receive path. These flags must match in * order for a socket to be shareable */ #define VXLAN_F_RCV_FLAGS (VXLAN_F_GBP | \ + VXLAN_F_GPE | \ VXLAN_F_UDP_ZERO_CSUM6_RX | \ VXLAN_F_REMCSUM_RX | \ VXLAN_F_REMCSUM_NOPARTIAL | \ VXLAN_F_COLLECT_METADATA) +/* Flags that can be set together with VXLAN_F_GPE. */ +#define VXLAN_F_ALLOWED_GPE (VXLAN_F_GPE | \ + VXLAN_F_IPV6 | \ + VXLAN_F_UDP_ZERO_CSUM_TX | \ + VXLAN_F_UDP_ZERO_CSUM6_TX | \ + VXLAN_F_UDP_ZERO_CSUM6_RX | \ + VXLAN_F_COLLECT_METADATA) + struct net_device *vxlan_dev_create(struct net *net, const char *name, u8 name_assign_type, struct vxlan_config *conf); @@ -252,7 +314,9 @@ static inline netdev_features_t vxlan_features_check(struct sk_buff *skb, (skb->inner_protocol_type != ENCAP_TYPE_ETHER || skb->inner_protocol != htons(ETH_P_TEB) || (skb_inner_mac_header(skb) - skb_transport_header(skb) != - sizeof(struct udphdr) + sizeof(struct vxlanhdr)))) + sizeof(struct udphdr) + sizeof(struct vxlanhdr)) || + (skb->ip_summed != CHECKSUM_NONE && + !can_checksum_protocol(features, inner_eth_hdr(skb)->h_proto)))) return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); return features; @@ -325,14 +389,6 @@ static inline __be32 vxlan_compute_rco(unsigned int start, unsigned int offset) return vni_field; } -#if IS_ENABLED(CONFIG_VXLAN) -void vxlan_get_rx_port(struct net_device *netdev); -#else -static inline void vxlan_get_rx_port(struct net_device *netdev) -{ -} -#endif - static inline unsigned short vxlan_get_sk_family(struct vxlan_sock *vs) { return vs->sock->sk->sk_family; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index d6f6e5006ee9..adfebd6f243c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -45,12 +45,8 @@ #ifdef CONFIG_XFRM_STATISTICS #define XFRM_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.xfrm_statistics, field) -#define XFRM_INC_STATS_BH(net, field) SNMP_INC_STATS_BH((net)->mib.xfrm_statistics, field) -#define XFRM_INC_STATS_USER(net, field) SNMP_INC_STATS_USER((net)-mib.xfrm_statistics, field) #else #define XFRM_INC_STATS(net, field) ((void)(net)) -#define XFRM_INC_STATS_BH(net, field) ((void)(net)) -#define XFRM_INC_STATS_USER(net, field) ((void)(net)) #endif |