diff options
Diffstat (limited to 'include/net')
126 files changed, 3546 insertions, 1568 deletions
diff --git a/include/net/6lowpan.h b/include/net/6lowpan.h index 5ab4c9901ccc..a71378007e61 100644 --- a/include/net/6lowpan.h +++ b/include/net/6lowpan.h @@ -198,6 +198,21 @@ static inline void lowpan_iphc_uncompress_eui64_lladdr(struct in6_addr *ipaddr, ipaddr->s6_addr[8] ^= 0x02; } +static inline void lowpan_iphc_uncompress_eui48_lladdr(struct in6_addr *ipaddr, + const void *lladdr) +{ + /* fe:80::XXXX:XXff:feXX:XXXX + * \_________________/ + * hwaddr + */ + ipaddr->s6_addr[0] = 0xFE; + ipaddr->s6_addr[1] = 0x80; + memcpy(&ipaddr->s6_addr[8], lladdr, 3); + ipaddr->s6_addr[11] = 0xFF; + ipaddr->s6_addr[12] = 0xFE; + memcpy(&ipaddr->s6_addr[13], lladdr + 3, 3); +} + #ifdef DEBUG /* print data in line */ static inline void raw_dump_inline(const char *caller, char *msg, diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index 27dfe85772b1..b8eb51a661e5 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -402,10 +402,10 @@ struct p9_wstat { u32 atime; u32 mtime; u64 length; - char *name; - char *uid; - char *gid; - char *muid; + const char *name; + const char *uid; + const char *gid; + const char *muid; char *extension; /* 9p2000.u extensions */ kuid_t n_uid; /* 9p2000.u extensions */ kgid_t n_gid; /* 9p2000.u extensions */ diff --git a/include/net/9p/client.h b/include/net/9p/client.h index c6b97e58cf84..7af9d769b97d 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -157,6 +157,18 @@ struct p9_client { enum p9_trans_status status; void *trans; + union { + struct { + int rfd; + int wfd; + } fd; + struct { + u16 port; + bool privport; + + } tcp; + } trans_opts; + struct p9_idpool *fidpool; struct list_head fidlist; @@ -213,6 +225,7 @@ struct p9_dirent { struct iov_iter; +int p9_show_client_options(struct seq_file *m, struct p9_client *clnt); int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb); int p9_client_rename(struct p9_fid *fid, struct p9_fid *newdirfid, const char *name); @@ -223,16 +236,16 @@ void p9_client_destroy(struct p9_client *clnt); void p9_client_disconnect(struct p9_client *clnt); void p9_client_begin_disconnect(struct p9_client *clnt); struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, - char *uname, kuid_t n_uname, char *aname); + const char *uname, kuid_t n_uname, const char *aname); struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, - char **wnames, int clone); + const unsigned char * const *wnames, int clone); int p9_client_open(struct p9_fid *fid, int mode); -int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, +int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode, char *extension); -int p9_client_link(struct p9_fid *fid, struct p9_fid *oldfid, char *newname); -int p9_client_symlink(struct p9_fid *fid, char *name, char *symname, kgid_t gid, - struct p9_qid *qid); -int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode, +int p9_client_link(struct p9_fid *fid, struct p9_fid *oldfid, const char *newname); +int p9_client_symlink(struct p9_fid *fid, const char *name, const char *symname, + kgid_t gid, struct p9_qid *qid); +int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, u32 mode, kgid_t gid, struct p9_qid *qid); int p9_client_clunk(struct p9_fid *fid); int p9_client_fsync(struct p9_fid *fid, int datasync); @@ -250,9 +263,9 @@ int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *attr); struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, u64 request_mask); -int p9_client_mknod_dotl(struct p9_fid *oldfid, char *name, int mode, +int p9_client_mknod_dotl(struct p9_fid *oldfid, const char *name, int mode, dev_t rdev, kgid_t gid, struct p9_qid *); -int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode, +int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode, kgid_t gid, struct p9_qid *); int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status); int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl); diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h index 5122b5e40f78..1625fb842ac4 100644 --- a/include/net/9p/transport.h +++ b/include/net/9p/transport.h @@ -62,6 +62,7 @@ struct p9_trans_module { int (*cancelled)(struct p9_client *, struct p9_req_t *req); int (*zc_request)(struct p9_client *, struct p9_req_t *, struct iov_iter *, struct iov_iter *, int , int, int); + int (*show_options)(struct seq_file *, struct p9_client *); }; void v9fs_register_trans(struct p9_trans_module *m); diff --git a/include/net/act_api.h b/include/net/act_api.h index 1d716449209e..26ffd8333f50 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -41,6 +41,8 @@ struct tc_action { struct rcu_head tcfa_rcu; struct gnet_stats_basic_cpu __percpu *cpu_bstats; struct gnet_stats_queue __percpu *cpu_qstats; + struct tc_cookie *act_cookie; + struct tcf_chain *goto_chain; }; #define tcf_head common.tcfa_head #define tcf_index common.tcfa_index @@ -179,12 +181,12 @@ int tcf_unregister_action(struct tc_action_ops *a, int tcf_action_destroy(struct list_head *actions, int bind); int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, int nr_actions, struct tcf_result *res); -int tcf_action_init(struct net *net, struct nlattr *nla, - struct nlattr *est, char *n, int ovr, - int bind, struct list_head *); -struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla, - struct nlattr *est, char *n, int ovr, - int bind); +int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, + struct nlattr *est, char *name, int ovr, int bind, + struct list_head *actions); +struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, + struct nlattr *nla, struct nlattr *est, + char *name, int ovr, int bind); int tcf_action_dump(struct sk_buff *skb, struct list_head *, int, int); int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int); int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int); diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 8f998afc1384..6df79e96a780 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -20,6 +20,8 @@ #define ADDRCONF_TIMER_FUZZ (HZ / 4) #define ADDRCONF_TIMER_FUZZ_MAX (HZ) +#define ADDRCONF_NOTIFY_PRIORITY 0 + #include <linux/in.h> #include <linux/in6.h> @@ -46,11 +48,15 @@ struct prefix_info { struct in6_addr prefix; }; - #include <linux/netdevice.h> #include <net/if_inet6.h> #include <net/ipv6.h> +struct in6_validator_info { + struct in6_addr i6vi_addr; + struct inet6_dev *i6vi_dev; +}; + #define IN6_ADDR_HSIZE_SHIFT 4 #define IN6_ADDR_HSIZE (1 << IN6_ADDR_HSIZE_SHIFT) @@ -88,9 +94,7 @@ int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr, u32 banned_flags); int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, u32 banned_flags); -int ipv4_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, - bool match_wildcard); -int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, +int inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, bool match_wildcard); void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr); void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr); @@ -105,12 +109,24 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, u32 addr_flags, bool sllao, bool tokenized, __u32 valid_lft, u32 prefered_lft); +static inline void addrconf_addr_eui48_base(u8 *eui, const char *const addr) +{ + memcpy(eui, addr, 3); + eui[3] = 0xFF; + eui[4] = 0xFE; + memcpy(eui + 5, addr + 3, 3); +} + +static inline void addrconf_addr_eui48(u8 *eui, const char *const addr) +{ + addrconf_addr_eui48_base(eui, addr); + eui[0] ^= 2; +} + static inline int addrconf_ifid_eui48(u8 *eui, struct net_device *dev) { if (dev->addr_len != ETH_ALEN) return -1; - memcpy(eui, dev->dev_addr, 3); - memcpy(eui + 5, dev->dev_addr + 3, 3); /* * The zSeries OSA network cards can be shared among various @@ -125,14 +141,16 @@ static inline int addrconf_ifid_eui48(u8 *eui, struct net_device *dev) * case. Hence the resulting interface identifier has local * scope according to RFC2373. */ + + addrconf_addr_eui48_base(eui, dev->dev_addr); + if (dev->dev_id) { eui[3] = (dev->dev_id >> 8) & 0xFF; eui[4] = dev->dev_id & 0xFF; } else { - eui[3] = 0xFF; - eui[4] = 0xFE; eui[0] ^= 2; } + return 0; } @@ -264,8 +282,12 @@ int register_inet6addr_notifier(struct notifier_block *nb); int unregister_inet6addr_notifier(struct notifier_block *nb); int inet6addr_notifier_call_chain(unsigned long val, void *v); -void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex, - struct ipv6_devconf *devconf); +int register_inet6addr_validator_notifier(struct notifier_block *nb); +int unregister_inet6addr_validator_notifier(struct notifier_block *nb); +int inet6addr_validator_notifier_call_chain(unsigned long val, void *v); + +void inet6_netconf_notify_devconf(struct net *net, int event, int type, + int ifindex, struct ipv6_devconf *devconf); /** * __in6_dev_get - get inet6_dev pointer from netdevice @@ -294,7 +316,7 @@ static inline struct inet6_dev *in6_dev_get(const struct net_device *dev) rcu_read_lock(); idev = rcu_dereference(dev->ip6_ptr); if (idev) - atomic_inc(&idev->refcnt); + refcount_inc(&idev->refcnt); rcu_read_unlock(); return idev; } @@ -310,36 +332,36 @@ void in6_dev_finish_destroy(struct inet6_dev *idev); static inline void in6_dev_put(struct inet6_dev *idev) { - if (atomic_dec_and_test(&idev->refcnt)) + if (refcount_dec_and_test(&idev->refcnt)) in6_dev_finish_destroy(idev); } static inline void __in6_dev_put(struct inet6_dev *idev) { - atomic_dec(&idev->refcnt); + refcount_dec(&idev->refcnt); } static inline void in6_dev_hold(struct inet6_dev *idev) { - atomic_inc(&idev->refcnt); + refcount_inc(&idev->refcnt); } void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp); static inline void in6_ifa_put(struct inet6_ifaddr *ifp) { - if (atomic_dec_and_test(&ifp->refcnt)) + if (refcount_dec_and_test(&ifp->refcnt)) inet6_ifa_finish_destroy(ifp); } static inline void __in6_ifa_put(struct inet6_ifaddr *ifp) { - atomic_dec(&ifp->refcnt); + refcount_dec(&ifp->refcnt); } static inline void in6_ifa_hold(struct inet6_ifaddr *ifp) { - atomic_inc(&ifp->refcnt); + refcount_inc(&ifp->refcnt); } diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index 1061a472a3e3..c172709787af 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -33,18 +33,20 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *, struct sockaddr_rxrpc *, struct key *, unsigned long, + s64, gfp_t, rxrpc_notify_rx_t); int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *, struct msghdr *, size_t); int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *, void *, size_t, size_t *, bool, u32 *); -void rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *, +bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *, u32, int, const char *); void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *); void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *, struct sockaddr_rxrpc *); int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t, rxrpc_user_attach_call_t, unsigned long, gfp_t); +void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64); #endif /* _NET_RXRPC_H */ diff --git a/include/net/af_unix.h b/include/net/af_unix.h index fd60eccb59a6..53b1a2cca421 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -4,6 +4,7 @@ #include <linux/socket.h> #include <linux/un.h> #include <linux/mutex.h> +#include <linux/refcount.h> #include <net/sock.h> void unix_inflight(struct user_struct *user, struct file *fp); @@ -21,7 +22,7 @@ extern spinlock_t unix_table_lock; extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; struct unix_address { - atomic_t refcnt; + refcount_t refcnt; int len; unsigned int hash; struct sockaddr_un name[0]; @@ -36,7 +37,7 @@ struct unix_skb_parms { u32 secid; /* Security ID */ #endif u32 consumed; -}; +} __randomize_layout; #define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb)) @@ -62,7 +63,7 @@ struct unix_sock { #define UNIX_GC_CANDIDATE 0 #define UNIX_GC_MAYBE_CYCLE 1 struct socket_wq peer_wq; - wait_queue_t peer_wake; + wait_queue_entry_t peer_wake; }; static inline struct unix_sock *unix_sk(const struct sock *sk) diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index f2758964ce6f..f9fb566e75cf 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -100,6 +100,9 @@ struct vsock_transport { void (*destruct)(struct vsock_sock *); void (*release)(struct vsock_sock *); + /* Cancel all pending packets sent on vsock. */ + int (*cancel_pkt)(struct vsock_sock *vsk); + /* Connections. */ int (*connect)(struct vsock_sock *); @@ -185,4 +188,17 @@ struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, void vsock_remove_sock(struct vsock_sock *vsk); void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); +/**** TAP ****/ + +struct vsock_tap { + struct net_device *dev; + struct module *module; + struct list_head list; +}; + +int vsock_init_tap(void); +int vsock_add_tap(struct vsock_tap *vt); +int vsock_remove_tap(struct vsock_tap *vt); +void vsock_deliver_tap(struct sk_buff *build_skb(void *opaque), void *opaque); + #endif /* __AF_VSOCK_H__ */ diff --git a/include/net/arp.h b/include/net/arp.h index 5e0f891d476c..17d90e4e8dc5 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -28,13 +28,29 @@ static inline struct neighbour *__ipv4_neigh_lookup(struct net_device *dev, u32 rcu_read_lock_bh(); n = __ipv4_neigh_lookup_noref(dev, key); - if (n && !atomic_inc_not_zero(&n->refcnt)) + if (n && !refcount_inc_not_zero(&n->refcnt)) n = NULL; rcu_read_unlock_bh(); return n; } +static inline void __ipv4_confirm_neigh(struct net_device *dev, u32 key) +{ + struct neighbour *n; + + rcu_read_lock_bh(); + n = __ipv4_neigh_lookup_noref(dev, key); + if (n) { + unsigned long now = jiffies; + + /* avoid dirtying neighbour */ + if (n->confirmed != now) + n->confirmed = now; + } + rcu_read_unlock_bh(); +} + void arp_init(void); int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg); void arp_send(int type, int ptype, __be32 dest_ip, diff --git a/include/net/ax25.h b/include/net/ax25.h index e602f8177ebf..c4a0cf6f0810 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -11,7 +11,7 @@ #include <linux/timer.h> #include <linux/list.h> #include <linux/slab.h> -#include <linux/atomic.h> +#include <linux/refcount.h> #include <net/neighbour.h> #include <net/sock.h> @@ -158,7 +158,7 @@ enum { typedef struct ax25_uid_assoc { struct hlist_node uid_node; - atomic_t refcount; + refcount_t refcount; kuid_t uid; ax25_address call; } ax25_uid_assoc; @@ -167,11 +167,11 @@ typedef struct ax25_uid_assoc { hlist_for_each_entry(__ax25, list, uid_node) #define ax25_uid_hold(ax25) \ - atomic_inc(&((ax25)->refcount)) + refcount_inc(&((ax25)->refcount)) static inline void ax25_uid_put(ax25_uid_assoc *assoc) { - if (atomic_dec_and_test(&assoc->refcount)) { + if (refcount_dec_and_test(&assoc->refcount)) { kfree(assoc); } } @@ -185,7 +185,7 @@ typedef struct { typedef struct ax25_route { struct ax25_route *next; - atomic_t refcount; + refcount_t refcount; ax25_address callsign; struct net_device *dev; ax25_digi *digipeat; @@ -194,14 +194,14 @@ typedef struct ax25_route { static inline void ax25_hold_route(ax25_route *ax25_rt) { - atomic_inc(&ax25_rt->refcount); + refcount_inc(&ax25_rt->refcount); } void __ax25_put_route(ax25_route *ax25_rt); static inline void ax25_put_route(ax25_route *ax25_rt) { - if (atomic_dec_and_test(&ax25_rt->refcount)) + if (refcount_dec_and_test(&ax25_rt->refcount)) __ax25_put_route(ax25_rt); } @@ -244,7 +244,7 @@ typedef struct ax25_cb { unsigned char window; struct timer_list timer, dtimer; struct sock *sk; /* Backlink to socket */ - atomic_t refcount; + refcount_t refcount; } ax25_cb; struct ax25_sock { @@ -266,11 +266,11 @@ static inline struct ax25_cb *sk_to_ax25(const struct sock *sk) hlist_for_each_entry(__ax25, list, ax25_node) #define ax25_cb_hold(__ax25) \ - atomic_inc(&((__ax25)->refcount)) + refcount_inc(&((__ax25)->refcount)) static __inline__ void ax25_cb_put(ax25_cb *ax25) { - if (atomic_dec_and_test(&ax25->refcount)) { + if (refcount_dec_and_test(&ax25->refcount)) { kfree(ax25->digipeat); kfree(ax25); } diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 99aa5e5e3100..fe98f0a5bef0 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -399,6 +399,7 @@ enum { #define HCI_LE_PING 0x10 #define HCI_LE_DATA_LEN_EXT 0x20 #define HCI_LE_EXT_SCAN_POLICY 0x80 +#define HCI_LE_CHAN_SEL_ALG2 0x40 /* Connection modes */ #define HCI_CM_ACTIVE 0x0000 @@ -1498,6 +1499,13 @@ struct hci_rp_le_read_max_data_len { __le16 rx_time; } __packed; +#define HCI_OP_LE_SET_DEFAULT_PHY 0x2031 +struct hci_cp_le_set_default_phy { + __u8 all_phys; + __u8 tx_phys; + __u8 rx_phys; +} __packed; + /* ---- HCI Events ---- */ #define HCI_EV_INQUIRY_COMPLETE 0x01 diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 554671c81f4a..95ccc1eef558 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -26,6 +26,8 @@ #define __HCI_CORE_H #include <linux/leds.h> +#include <linux/rculist.h> + #include <net/bluetooth/hci.h> #include <net/bluetooth/hci_sock.h> @@ -987,7 +989,7 @@ static inline void hci_conn_drop(struct hci_conn *conn) static inline void hci_dev_put(struct hci_dev *d) { BT_DBG("%s orig refcnt %d", d->name, - atomic_read(&d->dev.kobj.kref.refcount)); + kref_read(&d->dev.kobj.kref)); put_device(&d->dev); } @@ -995,7 +997,7 @@ static inline void hci_dev_put(struct hci_dev *d) static inline struct hci_dev *hci_dev_hold(struct hci_dev *d) { BT_DBG("%s orig refcnt %d", d->name, - atomic_read(&d->dev.kobj.kref.refcount)); + kref_read(&d->dev.kobj.kref)); get_device(&d->dev); return d; diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 5ee3c689c863..0697fd413087 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -282,7 +282,7 @@ struct l2cap_conn_rsp { #define L2CAP_CR_BAD_KEY_SIZE 0x0007 #define L2CAP_CR_ENCRYPTION 0x0008 #define L2CAP_CR_INVALID_SCID 0x0009 -#define L2CAP_CR_SCID_IN_USE 0x0010 +#define L2CAP_CR_SCID_IN_USE 0x000A /* connect/create channel status */ #define L2CAP_CS_NO_INFO 0x0000 diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index 4190af53a46a..da4acefe39c8 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h @@ -21,6 +21,8 @@ SOFTWARE IS DISCLAIMED. */ +#include <linux/refcount.h> + #ifndef __RFCOMM_H #define __RFCOMM_H @@ -174,7 +176,7 @@ struct rfcomm_dlc { struct mutex lock; unsigned long state; unsigned long flags; - atomic_t refcnt; + refcount_t refcnt; u8 dlci; u8 addr; u8 priority; @@ -247,12 +249,12 @@ struct rfcomm_dlc *rfcomm_dlc_exists(bdaddr_t *src, bdaddr_t *dst, u8 channel); static inline void rfcomm_dlc_hold(struct rfcomm_dlc *d) { - atomic_inc(&d->refcnt); + refcount_inc(&d->refcnt); } static inline void rfcomm_dlc_put(struct rfcomm_dlc *d) { - if (atomic_dec_and_test(&d->refcnt)) + if (refcount_dec_and_test(&d->refcnt)) rfcomm_dlc_free(d); } diff --git a/include/net/bond_options.h b/include/net/bond_options.h index 1797235cd590..d79d28f5318c 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -104,6 +104,8 @@ struct bond_option { int __bond_opt_set(struct bonding *bond, unsigned int option, struct bond_opt_value *val); +int __bond_opt_set_notify(struct bonding *bond, unsigned int option, + struct bond_opt_value *val); int bond_opt_tryset_rtnl(struct bonding *bond, unsigned int option, char *buf); const struct bond_opt_value *bond_opt_parse(const struct bond_option *opt, diff --git a/include/net/bonding.h b/include/net/bonding.h index 3c857778a6ca..b00508d22e0a 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -153,7 +153,8 @@ struct slave { unsigned long last_link_up; unsigned long last_rx; unsigned long target_last_arp_rx[BOND_MAX_ARP_TARGETS]; - s8 link; /* one of BOND_LINK_XXXX */ + s8 link; /* one of BOND_LINK_XXXX */ + s8 link_new_state; /* one of BOND_LINK_XXXX */ s8 new_link; u8 backup:1, /* indicates backup slave. Value corresponds with BOND_STATE_ACTIVE and BOND_STATE_BACKUP */ @@ -165,7 +166,7 @@ struct slave { u32 link_failure_count; u32 speed; u16 queue_id; - u8 perm_hwaddr[ETH_ALEN]; + u8 perm_hwaddr[MAX_ADDR_LEN]; struct ad_slave_info *ad_info; struct tlb_slave_info tlb_info; #ifdef CONFIG_NET_POLL_CONTROLLER @@ -401,6 +402,16 @@ static inline bool bond_slave_can_tx(struct slave *slave) bond_is_active_slave(slave); } +static inline void bond_hw_addr_copy(u8 *dst, const u8 *src, unsigned int len) +{ + if (len == ETH_ALEN) { + ether_addr_copy(dst, src); + return; + } + + memcpy(dst, src, len); +} + #define BOND_PRI_RESELECT_ALWAYS 0 #define BOND_PRI_RESELECT_BETTER 1 #define BOND_PRI_RESELECT_FAILURE 2 @@ -504,13 +515,17 @@ static inline bool bond_is_slave_inactive(struct slave *slave) return slave->inactive; } -static inline void bond_set_slave_link_state(struct slave *slave, int state, - bool notify) +static inline void bond_propose_link_state(struct slave *slave, int state) { - if (slave->link == state) + slave->link_new_state = state; +} + +static inline void bond_commit_link_state(struct slave *slave, bool notify) +{ + if (slave->link == slave->link_new_state) return; - slave->link = state; + slave->link = slave->link_new_state; if (notify) { bond_queue_slave_event(slave); bond_lower_state_changed(slave); @@ -523,6 +538,13 @@ static inline void bond_set_slave_link_state(struct slave *slave, int state, } } +static inline void bond_set_slave_link_state(struct slave *slave, int state, + bool notify) +{ + bond_propose_link_state(slave, state); + bond_commit_link_state(slave, notify); +} + static inline void bond_slave_link_notify(struct bonding *bond) { struct list_head *iter; @@ -592,6 +614,7 @@ struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev, int level); int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave); void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay); +void bond_work_init_all(struct bonding *bond); #ifdef CONFIG_PROC_FS void bond_create_proc_entry(struct bonding *bond); diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index d73b849e29a6..8ffd434676b7 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -25,6 +25,8 @@ #define _LINUX_NET_BUSY_POLL_H #include <linux/netdevice.h> +#include <linux/sched/clock.h> +#include <linux/sched/signal.h> #include <net/ip.h> #ifdef CONFIG_NET_RX_BUSY_POLL @@ -33,87 +35,101 @@ struct napi_struct; extern unsigned int sysctl_net_busy_read __read_mostly; extern unsigned int sysctl_net_busy_poll __read_mostly; -/* return values from ndo_ll_poll */ -#define LL_FLUSH_FAILED -1 -#define LL_FLUSH_BUSY -2 +/* 0 - Reserved to indicate value not set + * 1..NR_CPUS - Reserved for sender_cpu + * NR_CPUS+1..~0 - Region available for NAPI IDs + */ +#define MIN_NAPI_ID ((unsigned int)(NR_CPUS + 1)) static inline bool net_busy_loop_on(void) { return sysctl_net_busy_poll; } -static inline u64 busy_loop_us_clock(void) +static inline bool sk_can_busy_loop(const struct sock *sk) { - return local_clock() >> 10; + return sk->sk_ll_usec && !signal_pending(current); } -static inline unsigned long sk_busy_loop_end_time(struct sock *sk) -{ - return busy_loop_us_clock() + ACCESS_ONCE(sk->sk_ll_usec); -} +bool sk_busy_loop_end(void *p, unsigned long start_time); -/* in poll/select we use the global sysctl_net_ll_poll value */ -static inline unsigned long busy_loop_end_time(void) +void napi_busy_loop(unsigned int napi_id, + bool (*loop_end)(void *, unsigned long), + void *loop_end_arg); + +#else /* CONFIG_NET_RX_BUSY_POLL */ +static inline unsigned long net_busy_loop_on(void) { - return busy_loop_us_clock() + ACCESS_ONCE(sysctl_net_busy_poll); + return 0; } -static inline bool sk_can_busy_loop(const struct sock *sk) +static inline bool sk_can_busy_loop(struct sock *sk) { - return sk->sk_ll_usec && sk->sk_napi_id && !signal_pending(current); + return false; } +#endif /* CONFIG_NET_RX_BUSY_POLL */ -static inline bool busy_loop_timeout(unsigned long end_time) +static inline unsigned long busy_loop_current_time(void) { - unsigned long now = busy_loop_us_clock(); - - return time_after(now, end_time); +#ifdef CONFIG_NET_RX_BUSY_POLL + return (unsigned long)(local_clock() >> 10); +#else + return 0; +#endif } -bool sk_busy_loop(struct sock *sk, int nonblock); - -/* used in the NIC receive handler to mark the skb */ -static inline void skb_mark_napi_id(struct sk_buff *skb, - struct napi_struct *napi) +/* in poll/select we use the global sysctl_net_ll_poll value */ +static inline bool busy_loop_timeout(unsigned long start_time) { - skb->napi_id = napi->napi_id; -} +#ifdef CONFIG_NET_RX_BUSY_POLL + unsigned long bp_usec = READ_ONCE(sysctl_net_busy_poll); + if (bp_usec) { + unsigned long end_time = start_time + bp_usec; + unsigned long now = busy_loop_current_time(); -#else /* CONFIG_NET_RX_BUSY_POLL */ -static inline unsigned long net_busy_loop_on(void) -{ - return 0; + return time_after(now, end_time); + } +#endif + return true; } -static inline unsigned long busy_loop_end_time(void) +static inline bool sk_busy_loop_timeout(struct sock *sk, + unsigned long start_time) { - return 0; -} +#ifdef CONFIG_NET_RX_BUSY_POLL + unsigned long bp_usec = READ_ONCE(sk->sk_ll_usec); -static inline bool sk_can_busy_loop(struct sock *sk) -{ - return false; -} + if (bp_usec) { + unsigned long end_time = start_time + bp_usec; + unsigned long now = busy_loop_current_time(); -static inline void skb_mark_napi_id(struct sk_buff *skb, - struct napi_struct *napi) -{ + return time_after(now, end_time); + } +#endif + return true; } -static inline bool busy_loop_timeout(unsigned long end_time) +static inline void sk_busy_loop(struct sock *sk, int nonblock) { - return true; +#ifdef CONFIG_NET_RX_BUSY_POLL + unsigned int napi_id = READ_ONCE(sk->sk_napi_id); + + if (napi_id >= MIN_NAPI_ID) + napi_busy_loop(napi_id, nonblock ? NULL : sk_busy_loop_end, sk); +#endif } -static inline bool sk_busy_loop(struct sock *sk, int nonblock) +/* used in the NIC receive handler to mark the skb */ +static inline void skb_mark_napi_id(struct sk_buff *skb, + struct napi_struct *napi) { - return false; +#ifdef CONFIG_NET_RX_BUSY_POLL + skb->napi_id = napi->napi_id; +#endif } -#endif /* CONFIG_NET_RX_BUSY_POLL */ - /* used in the protocol hanlder to propagate the napi_id to the socket */ static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb) { diff --git a/include/net/calipso.h b/include/net/calipso.h index b1b30cd36601..5f95b11a04bf 100644 --- a/include/net/calipso.h +++ b/include/net/calipso.h @@ -38,7 +38,7 @@ #include <linux/skbuff.h> #include <net/netlabel.h> #include <net/request_sock.h> -#include <linux/atomic.h> +#include <linux/refcount.h> #include <asm/unaligned.h> /* known doi values */ @@ -57,7 +57,7 @@ struct calipso_doi { u32 doi; u32 type; - atomic_t refcount; + refcount_t refcount; struct list_head list; struct rcu_head rcu; }; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 814be4b4200c..f12fa5245a45 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5,7 +5,7 @@ * * Copyright 2006-2010 Johannes Berg <[email protected]> * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright 2015-2016 Intel Deutschland GmbH + * Copyright 2015-2017 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -311,6 +311,34 @@ struct ieee80211_supported_band { struct ieee80211_sta_vht_cap vht_cap; }; +/** + * wiphy_read_of_freq_limits - read frequency limits from device tree + * + * @wiphy: the wireless device to get extra limits for + * + * Some devices may have extra limitations specified in DT. This may be useful + * for chipsets that normally support more bands but are limited due to board + * design (e.g. by antennas or external power amplifier). + * + * This function reads info from DT and uses it to *modify* channels (disable + * unavailable ones). It's usually a *bad* idea to use it in drivers with + * shared channel data as DT limitations are device specific. You should make + * sure to call it only if channels in wiphy are copied and can be modified + * without affecting other devices. + * + * As this function access device node it has to be called after set_wiphy_dev. + * It also modifies channels so they have to be set first. + * If using this helper, call it before wiphy_register(). + */ +#ifdef CONFIG_OF +void wiphy_read_of_freq_limits(struct wiphy *wiphy); +#else /* CONFIG_OF */ +static inline void wiphy_read_of_freq_limits(struct wiphy *wiphy) +{ +} +#endif /* !CONFIG_OF */ + + /* * Wireless hardware/device configuration structures and methods */ @@ -335,6 +363,8 @@ struct ieee80211_supported_band { /** * struct vif_params - describes virtual interface parameters + * @flags: monitor interface flags, unchanged if 0, otherwise + * %MONITOR_FLAG_CHANGED will be set * @use_4addr: use 4-address frames * @macaddr: address to use for this virtual interface. * If this parameter is set to zero address the driver may @@ -342,13 +372,17 @@ struct ieee80211_supported_band { * This feature is only fully supported by drivers that enable the * %NL80211_FEATURE_MAC_ON_CREATE flag. Others may support creating ** only p2p devices with specified MAC. - * @vht_mumimo_groups: MU-MIMO groupID. used for monitoring only - * packets belonging to that MU-MIMO groupID. + * @vht_mumimo_groups: MU-MIMO groupID, used for monitoring MU-MIMO packets + * belonging to that MU-MIMO groupID; %NULL if not changed + * @vht_mumimo_follow_addr: MU-MIMO follow address, used for monitoring + * MU-MIMO packets going to the specified station; %NULL if not changed */ struct vif_params { + u32 flags; int use_4addr; u8 macaddr[ETH_ALEN]; - u8 vht_mumimo_groups[VHT_MUMIMO_GROUPS_DATA_LEN]; + const u8 *vht_mumimo_groups; + const u8 *vht_mumimo_follow_addr; }; /** @@ -615,6 +649,7 @@ struct survey_info { * @wep_keys: static WEP keys, if not NULL points to an array of * CFG80211_MAX_WEP_KEYS WEP keys * @wep_tx_key: key index (0..3) of the default TX static WEP key + * @psk: PSK (for devices supporting 4-way-handshake offload) */ struct cfg80211_crypto_settings { u32 wpa_versions; @@ -628,6 +663,7 @@ struct cfg80211_crypto_settings { bool control_port_no_encrypt; struct key_params *wep_keys; int wep_tx_key; + const u8 *psk; }; /** @@ -720,6 +756,10 @@ struct cfg80211_bitrate_mask { * @pbss: If set, start as a PCP instead of AP. Relevant for DMG * networks. * @beacon_rate: bitrate to be used for beacons + * @ht_cap: HT capabilities (or %NULL if HT isn't enabled) + * @vht_cap: VHT capabilities (or %NULL if VHT isn't enabled) + * @ht_required: stations must support HT + * @vht_required: stations must support VHT */ struct cfg80211_ap_settings { struct cfg80211_chan_def chandef; @@ -740,6 +780,10 @@ struct cfg80211_ap_settings { const struct cfg80211_acl_data *acl; bool pbss; struct cfg80211_bitrate_mask beacon_rate; + + const struct ieee80211_ht_cap *ht_cap; + const struct ieee80211_vht_cap *vht_cap; + bool ht_required, vht_required; }; /** @@ -971,9 +1015,9 @@ enum rate_info_flags { * @RATE_INFO_BW_160: 160 MHz bandwidth */ enum rate_info_bw { + RATE_INFO_BW_20 = 0, RATE_INFO_BW_5, RATE_INFO_BW_10, - RATE_INFO_BW_20, RATE_INFO_BW_40, RATE_INFO_BW_80, RATE_INFO_BW_160, @@ -1175,6 +1219,7 @@ static inline int cfg80211_get_station(struct net_device *dev, * Monitor interface configuration flags. Note that these must be the bits * according to the nl80211 flags. * + * @MONITOR_FLAG_CHANGED: set if the flags were changed * @MONITOR_FLAG_FCSFAIL: pass frames with bad FCS * @MONITOR_FLAG_PLCPFAIL: pass frames with bad PLCP * @MONITOR_FLAG_CONTROL: pass control frames @@ -1183,6 +1228,7 @@ static inline int cfg80211_get_station(struct net_device *dev, * @MONITOR_FLAG_ACTIVE: active monitor, ACKs frames on its MAC address */ enum monitor_flags { + MONITOR_FLAG_CHANGED = 1<<__NL80211_MNTR_FLAG_INVALID, MONITOR_FLAG_FCSFAIL = 1<<NL80211_MNTR_FLAG_FCSFAIL, MONITOR_FLAG_PLCPFAIL = 1<<NL80211_MNTR_FLAG_PLCPFAIL, MONITOR_FLAG_CONTROL = 1<<NL80211_MNTR_FLAG_CONTROL, @@ -1397,6 +1443,9 @@ struct mesh_config { * @mcast_rate: multicat rate for Mesh Node [6Mbps is the default for 802.11a] * @basic_rates: basic rates to use when creating the mesh * @beacon_rate: bitrate to be used for beacons + * @userspace_handles_dfs: whether user space controls DFS operation, i.e. + * changes the channel when a radar is detected. This is required + * to operate on DFS channels. * * These parameters are fixed when the mesh is created. */ @@ -1418,6 +1467,7 @@ struct mesh_setup { int mcast_rate[NUM_NL80211_BANDS]; u32 basic_rates; struct cfg80211_bitrate_mask beacon_rate; + bool userspace_handles_dfs; }; /** @@ -1569,11 +1619,15 @@ static inline void get_random_mask_addr(u8 *buf, const u8 *addr, const u8 *mask) /** * struct cfg80211_match_set - sets of attributes to match * - * @ssid: SSID to be matched; may be zero-length for no match (RSSI only) + * @ssid: SSID to be matched; may be zero-length in case of BSSID match + * or no match (RSSI only) + * @bssid: BSSID to be matched; may be all-zero BSSID in case of SSID match + * or no match (RSSI only) * @rssi_thold: don't report scan results below this threshold (in s32 dBm) */ struct cfg80211_match_set { struct cfg80211_ssid ssid; + u8 bssid[ETH_ALEN]; s32 rssi_thold; }; @@ -1592,8 +1646,20 @@ struct cfg80211_sched_scan_plan { }; /** + * struct cfg80211_bss_select_adjust - BSS selection with RSSI adjustment. + * + * @band: band of BSS which should match for RSSI level adjustment. + * @delta: value of RSSI level adjustment. + */ +struct cfg80211_bss_select_adjust { + enum nl80211_band band; + s8 delta; +}; + +/** * struct cfg80211_sched_scan_request - scheduled scan request description * + * @reqid: identifies this request. * @ssids: SSIDs to scan for (passed in the probe_reqs in active scans) * @n_ssids: number of SSIDs * @n_channels: total number of channels to scan @@ -1606,6 +1672,7 @@ struct cfg80211_sched_scan_plan { * (others are filtered out). * If ommited, all results are passed. * @n_match_sets: number of match sets + * @report_results: indicates that results were reported for this request * @wiphy: the wiphy this was for * @dev: the interface * @scan_start: start time of the scheduled scan @@ -1622,12 +1689,25 @@ struct cfg80211_sched_scan_plan { * @rcu_head: RCU callback used to free the struct * @owner_nlportid: netlink portid of owner (if this should is a request * owned by a particular socket) + * @nl_owner_dead: netlink owner socket was closed - this request be freed + * @list: for keeping list of requests. * @delay: delay in seconds to use before starting the first scan * cycle. The driver may ignore this parameter and start * immediately (or at any other time), if this feature is not * supported. + * @relative_rssi_set: Indicates whether @relative_rssi is set or not. + * @relative_rssi: Relative RSSI threshold in dB to restrict scan result + * reporting in connected state to cases where a matching BSS is determined + * to have better or slightly worse RSSI than the current connected BSS. + * The relative RSSI threshold values are ignored in disconnected state. + * @rssi_adjust: delta dB of RSSI preference to be given to the BSSs that belong + * to the specified band while deciding whether a better BSS is reported + * using @relative_rssi. If delta is a negative number, the BSSs that + * belong to the specified band will be penalized by delta dB in relative + * comparisions. */ struct cfg80211_sched_scan_request { + u64 reqid; struct cfg80211_ssid *ssids; int n_ssids; u32 n_channels; @@ -1645,12 +1725,19 @@ struct cfg80211_sched_scan_request { u8 mac_addr[ETH_ALEN] __aligned(2); u8 mac_addr_mask[ETH_ALEN] __aligned(2); + bool relative_rssi_set; + s8 relative_rssi; + struct cfg80211_bss_select_adjust rssi_adjust; + /* internal */ struct wiphy *wiphy; struct net_device *dev; unsigned long scan_start; + bool report_results; struct rcu_head rcu_head; u32 owner_nlportid; + bool nl_owner_dead; + struct list_head list; /* keep last */ struct ieee80211_channel *channels[0]; @@ -1887,7 +1974,7 @@ struct cfg80211_deauth_request { * struct cfg80211_disassoc_request - Disassociation request data * * This structure provides information needed to complete IEEE 802.11 - * disassocation. + * disassociation. * * @bss: the BSS to disassociate from * @ie: Extra IEs to add to Disassociation frame or %NULL @@ -1953,17 +2040,6 @@ struct cfg80211_ibss_params { }; /** - * struct cfg80211_bss_select_adjust - BSS selection with RSSI adjustment. - * - * @band: band of BSS which should match for RSSI level adjustment. - * @delta: value of RSSI level adjustment. - */ -struct cfg80211_bss_select_adjust { - enum nl80211_band band; - s8 delta; -}; - -/** * struct cfg80211_bss_selection - connection parameters for BSS selection. * * @behaviour: requested BSS selection behaviour. @@ -2023,6 +2099,21 @@ struct cfg80211_bss_selection { * the BSSID of the current association, i.e., to the value that is * included in the Current AP address field of the Reassociation Request * frame. + * @fils_erp_username: EAP re-authentication protocol (ERP) username part of the + * NAI or %NULL if not specified. This is used to construct FILS wrapped + * data IE. + * @fils_erp_username_len: Length of @fils_erp_username in octets. + * @fils_erp_realm: EAP re-authentication protocol (ERP) realm part of NAI or + * %NULL if not specified. This specifies the domain name of ER server and + * is used to construct FILS wrapped data IE. + * @fils_erp_realm_len: Length of @fils_erp_realm in octets. + * @fils_erp_next_seq_num: The next sequence number to use in the FILS ERP + * messages. This is also used to construct FILS wrapped data IE. + * @fils_erp_rrk: ERP re-authentication Root Key (rRK) used to derive additional + * keys in FILS or %NULL if not specified. + * @fils_erp_rrk_len: Length of @fils_erp_rrk in octets. + * @want_1x: indicates user-space supports and wants to use 802.1X driver + * offload of 4-way handshake. */ struct cfg80211_connect_params { struct ieee80211_channel *channel; @@ -2048,6 +2139,14 @@ struct cfg80211_connect_params { bool pbss; struct cfg80211_bss_selection bss_select; const u8 *prev_bssid; + const u8 *fils_erp_username; + size_t fils_erp_username_len; + const u8 *fils_erp_realm; + size_t fils_erp_realm_len; + u16 fils_erp_next_seq_num; + const u8 *fils_erp_rrk; + size_t fils_erp_rrk_len; + bool want_1x; }; /** @@ -2086,12 +2185,27 @@ enum wiphy_params_flags { * This structure is passed to the set/del_pmksa() method for PMKSA * caching. * - * @bssid: The AP's BSSID. - * @pmkid: The PMK material itself. + * @bssid: The AP's BSSID (may be %NULL). + * @pmkid: The identifier to refer a PMKSA. + * @pmk: The PMK for the PMKSA identified by @pmkid. This is used for key + * derivation by a FILS STA. Otherwise, %NULL. + * @pmk_len: Length of the @pmk. The length of @pmk can differ depending on + * the hash algorithm used to generate this. + * @ssid: SSID to specify the ESS within which a PMKSA is valid when using FILS + * cache identifier (may be %NULL). + * @ssid_len: Length of the @ssid in octets. + * @cache_id: 2-octet cache identifier advertized by a FILS AP identifying the + * scope of PMKSA. This is valid only if @ssid_len is non-zero (may be + * %NULL). */ struct cfg80211_pmksa { const u8 *bssid; const u8 *pmkid; + const u8 *pmk; + size_t pmk_len; + const u8 *ssid; + size_t ssid_len; + const u8 *cache_id; }; /** @@ -2366,11 +2480,13 @@ struct cfg80211_qos_map { * This struct defines NAN configuration parameters * * @master_pref: master preference (1 - 255) - * @dual: dual band operation mode, see &enum nl80211_nan_dual_band_conf + * @bands: operating bands, a bitmap of &enum nl80211_band values. + * For instance, for NL80211_BAND_2GHZ, bit 0 would be set + * (i.e. BIT(NL80211_BAND_2GHZ)). */ struct cfg80211_nan_conf { u8 master_pref; - u8 dual; + u8 bands; }; /** @@ -2378,11 +2494,11 @@ struct cfg80211_nan_conf { * configuration * * @CFG80211_NAN_CONF_CHANGED_PREF: master preference - * @CFG80211_NAN_CONF_CHANGED_DUAL: dual band operation + * @CFG80211_NAN_CONF_CHANGED_BANDS: operating bands */ enum cfg80211_nan_conf_changes { CFG80211_NAN_CONF_CHANGED_PREF = BIT(0), - CFG80211_NAN_CONF_CHANGED_DUAL = BIT(1), + CFG80211_NAN_CONF_CHANGED_BANDS = BIT(1), }; /** @@ -2453,6 +2569,23 @@ struct cfg80211_nan_func { }; /** + * struct cfg80211_pmk_conf - PMK configuration + * + * @aa: authenticator address + * @pmk_len: PMK length in bytes. + * @pmk: the PMK material + * @pmk_r0_name: PMK-R0 Name. NULL if not applicable (i.e., the PMK + * is not PMK-R0). When pmk_r0_name is not NULL, the pmk field + * holds PMK-R0. + */ +struct cfg80211_pmk_conf { + const u8 *aa; + u8 pmk_len; + const u8 *pmk; + const u8 *pmk_r0_name; +}; + +/** * struct cfg80211_ops - backend description for wireless configuration * * This struct is registered by fullmac card drivers and/or wireless stacks @@ -2581,8 +2714,7 @@ struct cfg80211_nan_func { * indication of requesting reassociation. * In both the driver-initiated and new connect() call initiated roaming * cases, the result of roaming is indicated with a call to - * cfg80211_roamed() or cfg80211_roamed_bss(). - * (invoked with the wireless_dev mutex held) + * cfg80211_roamed(). (invoked with the wireless_dev mutex held) * @update_connect_params: Update the connect parameters while connected to a * BSS. The updated parameters can be used by driver/firmware for * subsequent BSS selection (roaming) decisions and to form the @@ -2660,15 +2792,20 @@ struct cfg80211_nan_func { * the current level is above/below the configured threshold; this may * need some care when the configuration is changed (without first being * disabled.) + * @set_cqm_rssi_range_config: Configure two RSSI thresholds in the + * connection quality monitor. An event is to be sent only when the + * signal level is found to be outside the two values. The driver should + * set %NL80211_EXT_FEATURE_CQM_RSSI_LIST if this method is implemented. + * If it is provided then there's no point providing @set_cqm_rssi_config. * @set_cqm_txe_config: Configure connection quality monitor TX error * thresholds. * @sched_scan_start: Tell the driver to start a scheduled scan. - * @sched_scan_stop: Tell the driver to stop an ongoing scheduled scan. This - * call must stop the scheduled scan and be ready for starting a new one - * before it returns, i.e. @sched_scan_start may be called immediately - * after that again and should not fail in that case. The driver should - * not call cfg80211_sched_scan_stopped() for a requested stop (when this - * method returns 0.) + * @sched_scan_stop: Tell the driver to stop an ongoing scheduled scan with + * given request id. This call must stop the scheduled scan and be ready + * for starting a new one before it returns, i.e. @sched_scan_start may be + * called immediately after that again and should not fail in that case. + * The driver should not call cfg80211_sched_scan_stopped() for a requested + * stop (when this method returns 0). * * @mgmt_frame_register: Notify driver that a management frame type was * registered. The callback is allowed to sleep. @@ -2764,6 +2901,13 @@ struct cfg80211_nan_func { * All other parameters must be ignored. * * @set_multicast_to_unicast: configure multicast to unicast conversion for BSS + * + * @set_pmk: configure the PMK to be used for offloaded 802.1X 4-Way handshake. + * If not deleted through @del_pmk the PMK remains valid until disconnect + * upon which the driver should clear it. + * (invoked with the wireless_dev mutex held) + * @del_pmk: delete the previously configured PMK for the given authenticator. + * (invoked with the wireless_dev mutex held) */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -2774,13 +2918,12 @@ struct cfg80211_ops { const char *name, unsigned char name_assign_type, enum nl80211_iftype type, - u32 *flags, struct vif_params *params); int (*del_virtual_intf)(struct wiphy *wiphy, struct wireless_dev *wdev); int (*change_virtual_intf)(struct wiphy *wiphy, struct net_device *dev, - enum nl80211_iftype type, u32 *flags, + enum nl80211_iftype type, struct vif_params *params); int (*add_key)(struct wiphy *wiphy, struct net_device *netdev, @@ -2949,6 +3092,10 @@ struct cfg80211_ops { struct net_device *dev, s32 rssi_thold, u32 rssi_hyst); + int (*set_cqm_rssi_range_config)(struct wiphy *wiphy, + struct net_device *dev, + s32 rssi_low, s32 rssi_high); + int (*set_cqm_txe_config)(struct wiphy *wiphy, struct net_device *dev, u32 rate, u32 pkts, u32 intvl); @@ -2963,7 +3110,8 @@ struct cfg80211_ops { int (*sched_scan_start)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_sched_scan_request *request); - int (*sched_scan_stop)(struct wiphy *wiphy, struct net_device *dev); + int (*sched_scan_stop)(struct wiphy *wiphy, struct net_device *dev, + u64 reqid); int (*set_rekey_data)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_gtk_rekey_data *data); @@ -3048,6 +3196,11 @@ struct cfg80211_ops { int (*set_multicast_to_unicast)(struct wiphy *wiphy, struct net_device *dev, const bool enabled); + + int (*set_pmk)(struct wiphy *wiphy, struct net_device *dev, + const struct cfg80211_pmk_conf *conf); + int (*del_pmk)(struct wiphy *wiphy, struct net_device *dev, + const u8 *aa); }; /* @@ -3108,7 +3261,7 @@ enum wiphy_flags { WIPHY_FLAG_CONTROL_PORT_PROTOCOL = BIT(7), WIPHY_FLAG_IBSS_RSN = BIT(8), WIPHY_FLAG_MESH_AUTH = BIT(10), - WIPHY_FLAG_SUPPORTS_SCHED_SCAN = BIT(11), + /* use hole at 11 */ /* use hole at 12 */ WIPHY_FLAG_SUPPORTS_FW_ROAM = BIT(13), WIPHY_FLAG_AP_UAPSD = BIT(14), @@ -3136,22 +3289,6 @@ struct ieee80211_iface_limit { /** * struct ieee80211_iface_combination - possible interface combination - * @limits: limits for the given interface types - * @n_limits: number of limitations - * @num_different_channels: can use up to this many different channels - * @max_interfaces: maximum number of interfaces in total allowed in this - * group - * @beacon_int_infra_match: In this combination, the beacon intervals - * between infrastructure and AP types must match. This is required - * only in special cases. - * @radar_detect_widths: bitmap of channel widths supported for radar detection - * @radar_detect_regions: bitmap of regions supported for radar detection - * @beacon_int_min_gcd: This interface combination supports different - * beacon intervals. - * = 0 - all beacon intervals for different interface must be same. - * > 0 - any beacon interval for the interface part of this combination AND - * *GCD* of all beacon intervals from beaconing interfaces of this - * combination must be greater or equal to this value. * * With this structure the driver can describe which interface * combinations it supports concurrently. @@ -3210,13 +3347,60 @@ struct ieee80211_iface_limit { * */ struct ieee80211_iface_combination { + /** + * @limits: + * limits for the given interface types + */ const struct ieee80211_iface_limit *limits; + + /** + * @num_different_channels: + * can use up to this many different channels + */ u32 num_different_channels; + + /** + * @max_interfaces: + * maximum number of interfaces in total allowed in this group + */ u16 max_interfaces; + + /** + * @n_limits: + * number of limitations + */ u8 n_limits; + + /** + * @beacon_int_infra_match: + * In this combination, the beacon intervals between infrastructure + * and AP types must match. This is required only in special cases. + */ bool beacon_int_infra_match; + + /** + * @radar_detect_widths: + * bitmap of channel widths supported for radar detection + */ u8 radar_detect_widths; + + /** + * @radar_detect_regions: + * bitmap of regions supported for radar detection + */ u8 radar_detect_regions; + + /** + * @beacon_int_min_gcd: + * This interface combination supports different beacon intervals. + * + * = 0 + * all beacon intervals for different interface must be same. + * > 0 + * any beacon interval for the interface part of this combination AND + * GCD of all beacon intervals from beaconing interfaces of this + * combination must be greater or equal to this value. + */ u32 beacon_int_min_gcd; }; @@ -3415,6 +3599,8 @@ struct wiphy_iftype_ext_capab { * this variable determines its size * @max_scan_ssids: maximum number of SSIDs the device can scan for in * any given scan + * @max_sched_scan_reqs: maximum number of scheduled scan requests that + * the device can run concurrently. * @max_sched_scan_ssids: maximum number of SSIDs the device can scan * for in any given scheduled scan * @max_match_sets: maximum number of match sets the device can handle @@ -3515,6 +3701,10 @@ struct wiphy_iftype_ext_capab { * attribute indices defined in &enum nl80211_bss_select_attr. * * @cookie_counter: unique generic cookie counter, used to identify objects. + * @nan_supported_bands: bands supported by the device in NAN mode, a + * bitmap of &enum nl80211_band values. For instance, for + * NL80211_BAND_2GHZ, bit 0 would be set + * (i.e. BIT(NL80211_BAND_2GHZ)). */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -3547,6 +3737,7 @@ struct wiphy { int bss_priv_size; u8 max_scan_ssids; + u8 max_sched_scan_reqs; u8 max_sched_scan_ssids; u8 max_match_sets; u16 max_scan_ie_len; @@ -3646,6 +3837,8 @@ struct wiphy { u64 cookie_counter; + u8 nan_supported_bands; + char priv[0] __aligned(NETDEV_ALIGN); }; @@ -3782,6 +3975,7 @@ void wiphy_free(struct wiphy *wiphy); struct cfg80211_conn; struct cfg80211_internal_bss; struct cfg80211_cached_keys; +struct cfg80211_cqm_config; /** * struct wireless_dev - wireless device state @@ -3837,11 +4031,16 @@ struct cfg80211_cached_keys; * @conn: (private) cfg80211 software SME connection state machine data * @connect_keys: (private) keys to set after connection is established * @conn_bss_type: connecting/connected BSS type + * @conn_owner_nlportid: (private) connection owner socket port ID + * @disconnect_wk: (private) auto-disconnect work + * @disconnect_bssid: (private) the BSSID to use for auto-disconnect * @ibss_fixed: (private) IBSS is using fixed BSSID * @ibss_dfs_possible: (private) IBSS may change to a DFS channel * @event_list: (private) list for internal event processing * @event_lock: (private) lock for event list * @owner_nlportid: (private) owner socket port ID + * @nl_owner_dead: (private) owner socket went away + * @cqm_config: (private) nl80211 RSSI monitor state */ struct wireless_dev { struct wiphy *wiphy; @@ -3868,6 +4067,10 @@ struct wireless_dev { struct cfg80211_conn *conn; struct cfg80211_cached_keys *connect_keys; enum ieee80211_bss_type conn_bss_type; + u32 conn_owner_nlportid; + + struct work_struct disconnect_wk; + u8 disconnect_bssid[ETH_ALEN]; struct list_head event_list; spinlock_t event_lock; @@ -3886,12 +4089,13 @@ struct wireless_dev { u32 ap_unexpected_nlportid; + u32 owner_nlportid; + bool nl_owner_dead; + bool cac_started; unsigned long cac_start_time; unsigned int cac_time_ms; - u32 owner_nlportid; - #ifdef CONFIG_CFG80211_WEXT /* wext data */ struct { @@ -3906,6 +4110,8 @@ struct wireless_dev { bool prev_bssid_valid; } wext; #endif + + struct cfg80211_cqm_config *cqm_config; }; static inline u8 *wdev_address(struct wireless_dev *wdev) @@ -3955,26 +4161,15 @@ int ieee80211_channel_to_frequency(int chan, enum nl80211_band band); */ int ieee80211_frequency_to_channel(int freq); -/* - * Name indirection necessary because the ieee80211 code also has - * a function named "ieee80211_get_channel", so if you include - * cfg80211's header file you get cfg80211's version, if you try - * to include both header files you'll (rightfully!) get a symbol - * clash. - */ -struct ieee80211_channel *__ieee80211_get_channel(struct wiphy *wiphy, - int freq); /** * ieee80211_get_channel - get channel struct from wiphy for specified frequency + * * @wiphy: the struct wiphy to get the channel for * @freq: the center frequency of the channel + * * Return: The channel struct from @wiphy at @freq. */ -static inline struct ieee80211_channel * -ieee80211_get_channel(struct wiphy *wiphy, int freq) -{ - return __ieee80211_get_channel(wiphy, freq); -} +struct ieee80211_channel *ieee80211_get_channel(struct wiphy *wiphy, int freq); /** * ieee80211_get_response_rate - get basic rate for a given rate @@ -4409,31 +4604,34 @@ void cfg80211_scan_done(struct cfg80211_scan_request *request, * cfg80211_sched_scan_results - notify that new scan results are available * * @wiphy: the wiphy which got scheduled scan results + * @reqid: identifier for the related scheduled scan request */ -void cfg80211_sched_scan_results(struct wiphy *wiphy); +void cfg80211_sched_scan_results(struct wiphy *wiphy, u64 reqid); /** * cfg80211_sched_scan_stopped - notify that the scheduled scan has stopped * * @wiphy: the wiphy on which the scheduled scan stopped + * @reqid: identifier for the related scheduled scan request * * The driver can call this function to inform cfg80211 that the * scheduled scan had to be stopped, for whatever reason. The driver * is then called back via the sched_scan_stop operation when done. */ -void cfg80211_sched_scan_stopped(struct wiphy *wiphy); +void cfg80211_sched_scan_stopped(struct wiphy *wiphy, u64 reqid); /** * cfg80211_sched_scan_stopped_rtnl - notify that the scheduled scan has stopped * * @wiphy: the wiphy on which the scheduled scan stopped + * @reqid: identifier for the related scheduled scan request * * The driver can call this function to inform cfg80211 that the * scheduled scan had to be stopped, for whatever reason. The driver * is then called back via the sched_scan_stop operation when done. * This function should be called with rtnl locked. */ -void cfg80211_sched_scan_stopped_rtnl(struct wiphy *wiphy); +void cfg80211_sched_scan_stopped_rtnl(struct wiphy *wiphy, u64 reqid); /** * cfg80211_inform_bss_frame_data - inform cfg80211 of a received BSS frame @@ -4566,12 +4764,22 @@ cfg80211_inform_bss(struct wiphy *wiphy, gfp); } +/** + * cfg80211_get_bss - get a BSS reference + * @wiphy: the wiphy this BSS struct belongs to + * @channel: the channel to search on (or %NULL) + * @bssid: the desired BSSID (or %NULL) + * @ssid: the desired SSID (or %NULL) + * @ssid_len: length of the SSID (or 0) + * @bss_type: type of BSS, see &enum ieee80211_bss_type + * @privacy: privacy filter, see &enum ieee80211_privacy + */ struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy, struct ieee80211_channel *channel, const u8 *bssid, const u8 *ssid, size_t ssid_len, enum ieee80211_bss_type bss_type, - enum ieee80211_privacy); + enum ieee80211_privacy privacy); static inline struct cfg80211_bss * cfg80211_get_ibss(struct wiphy *wiphy, struct ieee80211_channel *channel, @@ -5038,6 +5246,78 @@ static inline void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp) #endif /** + * struct cfg80211_connect_resp_params - Connection response params + * @status: Status code, %WLAN_STATUS_SUCCESS for successful connection, use + * %WLAN_STATUS_UNSPECIFIED_FAILURE if your device cannot give you + * the real status code for failures. If this call is used to report a + * failure due to a timeout (e.g., not receiving an Authentication frame + * from the AP) instead of an explicit rejection by the AP, -1 is used to + * indicate that this is a failure, but without a status code. + * @timeout_reason is used to report the reason for the timeout in that + * case. + * @bssid: The BSSID of the AP (may be %NULL) + * @bss: Entry of bss to which STA got connected to, can be obtained through + * cfg80211_get_bss() (may be %NULL). Only one parameter among @bssid and + * @bss needs to be specified. + * @req_ie: Association request IEs (may be %NULL) + * @req_ie_len: Association request IEs length + * @resp_ie: Association response IEs (may be %NULL) + * @resp_ie_len: Association response IEs length + * @fils_kek: KEK derived from a successful FILS connection (may be %NULL) + * @fils_kek_len: Length of @fils_kek in octets + * @update_erp_next_seq_num: Boolean value to specify whether the value in + * @fils_erp_next_seq_num is valid. + * @fils_erp_next_seq_num: The next sequence number to use in ERP message in + * FILS Authentication. This value should be specified irrespective of the + * status for a FILS connection. + * @pmk: A new PMK if derived from a successful FILS connection (may be %NULL). + * @pmk_len: Length of @pmk in octets + * @pmkid: A new PMKID if derived from a successful FILS connection or the PMKID + * used for this FILS connection (may be %NULL). + * @timeout_reason: Reason for connection timeout. This is used when the + * connection fails due to a timeout instead of an explicit rejection from + * the AP. %NL80211_TIMEOUT_UNSPECIFIED is used when the timeout reason is + * not known. This value is used only if @status < 0 to indicate that the + * failure is due to a timeout and not due to explicit rejection by the AP. + * This value is ignored in other cases (@status >= 0). + */ +struct cfg80211_connect_resp_params { + int status; + const u8 *bssid; + struct cfg80211_bss *bss; + const u8 *req_ie; + size_t req_ie_len; + const u8 *resp_ie; + size_t resp_ie_len; + const u8 *fils_kek; + size_t fils_kek_len; + bool update_erp_next_seq_num; + u16 fils_erp_next_seq_num; + const u8 *pmk; + size_t pmk_len; + const u8 *pmkid; + enum nl80211_timeout_reason timeout_reason; +}; + +/** + * cfg80211_connect_done - notify cfg80211 of connection result + * + * @dev: network device + * @params: connection response parameters + * @gfp: allocation flags + * + * It should be called by the underlying driver once execution of the connection + * request from connect() has been completed. This is similar to + * cfg80211_connect_bss(), but takes a structure pointer for connection response + * parameters. Only one of the functions among cfg80211_connect_bss(), + * cfg80211_connect_result(), cfg80211_connect_timeout(), + * and cfg80211_connect_done() should be called. + */ +void cfg80211_connect_done(struct net_device *dev, + struct cfg80211_connect_resp_params *params, + gfp_t gfp); + +/** * cfg80211_connect_bss - notify cfg80211 of connection result * * @dev: network device @@ -5048,20 +5328,50 @@ static inline void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp) * @req_ie_len: association request IEs length * @resp_ie: association response IEs (may be %NULL) * @resp_ie_len: assoc response IEs length - * @status: status code, 0 for successful connection, use - * %WLAN_STATUS_UNSPECIFIED_FAILURE if your device cannot give you - * the real status code for failures. + * @status: status code, %WLAN_STATUS_SUCCESS for successful connection, use + * %WLAN_STATUS_UNSPECIFIED_FAILURE if your device cannot give you + * the real status code for failures. If this call is used to report a + * failure due to a timeout (e.g., not receiving an Authentication frame + * from the AP) instead of an explicit rejection by the AP, -1 is used to + * indicate that this is a failure, but without a status code. + * @timeout_reason is used to report the reason for the timeout in that + * case. * @gfp: allocation flags - * - * It should be called by the underlying driver whenever connect() has - * succeeded. This is similar to cfg80211_connect_result(), but with the - * option of identifying the exact bss entry for the connection. Only one of - * these functions should be called. + * @timeout_reason: reason for connection timeout. This is used when the + * connection fails due to a timeout instead of an explicit rejection from + * the AP. %NL80211_TIMEOUT_UNSPECIFIED is used when the timeout reason is + * not known. This value is used only if @status < 0 to indicate that the + * failure is due to a timeout and not due to explicit rejection by the AP. + * This value is ignored in other cases (@status >= 0). + * + * It should be called by the underlying driver once execution of the connection + * request from connect() has been completed. This is similar to + * cfg80211_connect_result(), but with the option of identifying the exact bss + * entry for the connection. Only one of the functions among + * cfg80211_connect_bss(), cfg80211_connect_result(), + * cfg80211_connect_timeout(), and cfg80211_connect_done() should be called. */ -void cfg80211_connect_bss(struct net_device *dev, const u8 *bssid, - struct cfg80211_bss *bss, const u8 *req_ie, - size_t req_ie_len, const u8 *resp_ie, - size_t resp_ie_len, int status, gfp_t gfp); +static inline void +cfg80211_connect_bss(struct net_device *dev, const u8 *bssid, + struct cfg80211_bss *bss, const u8 *req_ie, + size_t req_ie_len, const u8 *resp_ie, + size_t resp_ie_len, int status, gfp_t gfp, + enum nl80211_timeout_reason timeout_reason) +{ + struct cfg80211_connect_resp_params params; + + memset(¶ms, 0, sizeof(params)); + params.status = status; + params.bssid = bssid; + params.bss = bss; + params.req_ie = req_ie; + params.req_ie_len = req_ie_len; + params.resp_ie = resp_ie; + params.resp_ie_len = resp_ie_len; + params.timeout_reason = timeout_reason; + + cfg80211_connect_done(dev, ¶ms, gfp); +} /** * cfg80211_connect_result - notify cfg80211 of connection result @@ -5072,13 +5382,16 @@ void cfg80211_connect_bss(struct net_device *dev, const u8 *bssid, * @req_ie_len: association request IEs length * @resp_ie: association response IEs (may be %NULL) * @resp_ie_len: assoc response IEs length - * @status: status code, 0 for successful connection, use + * @status: status code, %WLAN_STATUS_SUCCESS for successful connection, use * %WLAN_STATUS_UNSPECIFIED_FAILURE if your device cannot give you * the real status code for failures. * @gfp: allocation flags * - * It should be called by the underlying driver whenever connect() has - * succeeded. + * It should be called by the underlying driver once execution of the connection + * request from connect() has been completed. This is similar to + * cfg80211_connect_bss() which allows the exact bss entry to be specified. Only + * one of the functions among cfg80211_connect_bss(), cfg80211_connect_result(), + * cfg80211_connect_timeout(), and cfg80211_connect_done() should be called. */ static inline void cfg80211_connect_result(struct net_device *dev, const u8 *bssid, @@ -5087,7 +5400,8 @@ cfg80211_connect_result(struct net_device *dev, const u8 *bssid, u16 status, gfp_t gfp) { cfg80211_connect_bss(dev, bssid, NULL, req_ie, req_ie_len, resp_ie, - resp_ie_len, status, gfp); + resp_ie_len, status, gfp, + NL80211_TIMEOUT_UNSPECIFIED); } /** @@ -5098,67 +5412,70 @@ cfg80211_connect_result(struct net_device *dev, const u8 *bssid, * @req_ie: association request IEs (maybe be %NULL) * @req_ie_len: association request IEs length * @gfp: allocation flags + * @timeout_reason: reason for connection timeout. * * It should be called by the underlying driver whenever connect() has failed * in a sequence where no explicit authentication/association rejection was * received from the AP. This could happen, e.g., due to not being able to send * out the Authentication or Association Request frame or timing out while - * waiting for the response. + * waiting for the response. Only one of the functions among + * cfg80211_connect_bss(), cfg80211_connect_result(), + * cfg80211_connect_timeout(), and cfg80211_connect_done() should be called. */ static inline void cfg80211_connect_timeout(struct net_device *dev, const u8 *bssid, - const u8 *req_ie, size_t req_ie_len, gfp_t gfp) + const u8 *req_ie, size_t req_ie_len, gfp_t gfp, + enum nl80211_timeout_reason timeout_reason) { cfg80211_connect_bss(dev, bssid, NULL, req_ie, req_ie_len, NULL, 0, -1, - gfp); + gfp, timeout_reason); } /** - * cfg80211_roamed - notify cfg80211 of roaming + * struct cfg80211_roam_info - driver initiated roaming information * - * @dev: network device * @channel: the channel of the new AP - * @bssid: the BSSID of the new AP + * @bss: entry of bss to which STA got roamed (may be %NULL if %bssid is set) + * @bssid: the BSSID of the new AP (may be %NULL if %bss is set) * @req_ie: association request IEs (maybe be %NULL) * @req_ie_len: association request IEs length * @resp_ie: association response IEs (may be %NULL) * @resp_ie_len: assoc response IEs length - * @gfp: allocation flags - * - * It should be called by the underlying driver whenever it roamed - * from one AP to another while connected. + * @authorized: true if the 802.1X authentication was done by the driver or is + * not needed (e.g., when Fast Transition protocol was used), false + * otherwise. Ignored for networks that don't use 802.1X authentication. */ -void cfg80211_roamed(struct net_device *dev, - struct ieee80211_channel *channel, - const u8 *bssid, - const u8 *req_ie, size_t req_ie_len, - const u8 *resp_ie, size_t resp_ie_len, gfp_t gfp); +struct cfg80211_roam_info { + struct ieee80211_channel *channel; + struct cfg80211_bss *bss; + const u8 *bssid; + const u8 *req_ie; + size_t req_ie_len; + const u8 *resp_ie; + size_t resp_ie_len; + bool authorized; +}; /** - * cfg80211_roamed_bss - notify cfg80211 of roaming + * cfg80211_roamed - notify cfg80211 of roaming * * @dev: network device - * @bss: entry of bss to which STA got roamed - * @req_ie: association request IEs (maybe be %NULL) - * @req_ie_len: association request IEs length - * @resp_ie: association response IEs (may be %NULL) - * @resp_ie_len: assoc response IEs length + * @info: information about the new BSS. struct &cfg80211_roam_info. * @gfp: allocation flags * - * This is just a wrapper to notify cfg80211 of roaming event with driver - * passing bss to avoid a race in timeout of the bss entry. It should be - * called by the underlying driver whenever it roamed from one AP to another - * while connected. Drivers which have roaming implemented in firmware - * may use this function to avoid a race in bss entry timeout where the bss - * entry of the new AP is seen in the driver, but gets timed out by the time - * it is accessed in __cfg80211_roamed() due to delay in scheduling + * This function may be called with the driver passing either the BSSID of the + * new AP or passing the bss entry to avoid a race in timeout of the bss entry. + * It should be called by the underlying driver whenever it roamed from one AP + * to another while connected. Drivers which have roaming implemented in + * firmware should pass the bss entry to avoid a race in bss entry timeout where + * the bss entry of the new AP is seen in the driver, but gets timed out by the + * time it is accessed in __cfg80211_roamed() due to delay in scheduling * rdev->event_work. In case of any failures, the reference is released - * either in cfg80211_roamed_bss() or in __cfg80211_romed(), Otherwise, - * it will be released while diconneting from the current bss. + * either in cfg80211_roamed() or in __cfg80211_romed(), Otherwise, it will be + * released while diconneting from the current bss. */ -void cfg80211_roamed_bss(struct net_device *dev, struct cfg80211_bss *bss, - const u8 *req_ie, size_t req_ie_len, - const u8 *resp_ie, size_t resp_ie_len, gfp_t gfp); +void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info, + gfp_t gfp); /** * cfg80211_disconnected - notify cfg80211 that connection was dropped @@ -5296,6 +5613,7 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, * cfg80211_cqm_rssi_notify - connection quality monitoring rssi event * @dev: network device * @rssi_event: the triggered RSSI event + * @rssi_level: new RSSI level value or 0 if not available * @gfp: context flags * * This function is called when a configured connection quality monitoring @@ -5303,7 +5621,7 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, */ void cfg80211_cqm_rssi_notify(struct net_device *dev, enum nl80211_cqm_rssi_threshold_event rssi_event, - gfp_t gfp); + s32 rssi_level, gfp_t gfp); /** * cfg80211_cqm_pktloss_notify - notify userspace about packetloss to peer diff --git a/include/net/checksum.h b/include/net/checksum.h index 35d0fabd2782..aef2b2bb6603 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -179,7 +179,7 @@ static inline __wsum remcsum_adjust(void *ptr, __wsum csum, static inline void remcsum_unadjust(__sum16 *psum, __wsum delta) { - *psum = csum_fold(csum_sub(delta, *psum)); + *psum = csum_fold(csum_sub(delta, (__force __wsum)*psum)); } #endif diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h index a34b141f125f..880adb2f2afd 100644 --- a/include/net/cipso_ipv4.h +++ b/include/net/cipso_ipv4.h @@ -41,6 +41,7 @@ #include <net/netlabel.h> #include <net/request_sock.h> #include <linux/atomic.h> +#include <linux/refcount.h> #include <asm/unaligned.h> /* known doi values */ @@ -85,7 +86,7 @@ struct cipso_v4_doi { } map; u8 tags[CIPSO_V4_TAG_MAXCNT]; - atomic_t refcount; + refcount_t refcount; struct list_head list; struct rcu_head rcu; }; diff --git a/include/net/devlink.h b/include/net/devlink.h index d29e5fc82582..ed7687bbf5d0 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -25,6 +25,8 @@ struct devlink { struct list_head list; struct list_head port_list; struct list_head sb_list; + struct list_head dpipe_table_list; + struct devlink_dpipe_headers *dpipe_headers; const struct devlink_ops *ops; struct device *dev; possible_net_t _net; @@ -49,6 +51,178 @@ struct devlink_sb_pool_info { enum devlink_sb_threshold_type threshold_type; }; +/** + * struct devlink_dpipe_field - dpipe field object + * @name: field name + * @id: index inside the headers field array + * @bitwidth: bitwidth + * @mapping_type: mapping type + */ +struct devlink_dpipe_field { + const char *name; + unsigned int id; + unsigned int bitwidth; + enum devlink_dpipe_field_mapping_type mapping_type; +}; + +/** + * struct devlink_dpipe_header - dpipe header object + * @name: header name + * @id: index, global/local detrmined by global bit + * @fields: fields + * @fields_count: number of fields + * @global: indicates if header is shared like most protocol header + * or driver specific + */ +struct devlink_dpipe_header { + const char *name; + unsigned int id; + struct devlink_dpipe_field *fields; + unsigned int fields_count; + bool global; +}; + +/** + * struct devlink_dpipe_match - represents match operation + * @type: type of match + * @header_index: header index (packets can have several headers of same + * type like in case of tunnels) + * @header: header + * @fieled_id: field index + */ +struct devlink_dpipe_match { + enum devlink_dpipe_match_type type; + unsigned int header_index; + struct devlink_dpipe_header *header; + unsigned int field_id; +}; + +/** + * struct devlink_dpipe_action - represents action operation + * @type: type of action + * @header_index: header index (packets can have several headers of same + * type like in case of tunnels) + * @header: header + * @fieled_id: field index + */ +struct devlink_dpipe_action { + enum devlink_dpipe_action_type type; + unsigned int header_index; + struct devlink_dpipe_header *header; + unsigned int field_id; +}; + +/** + * struct devlink_dpipe_value - represents value of match/action + * @action: action + * @match: match + * @mapping_value: in case the field has some mapping this value + * specified the mapping value + * @mapping_valid: specify if mapping value is valid + * @value_size: value size + * @value: value + * @mask: bit mask + */ +struct devlink_dpipe_value { + union { + struct devlink_dpipe_action *action; + struct devlink_dpipe_match *match; + }; + unsigned int mapping_value; + bool mapping_valid; + unsigned int value_size; + void *value; + void *mask; +}; + +/** + * struct devlink_dpipe_entry - table entry object + * @index: index of the entry in the table + * @match_values: match values + * @matche_values_count: count of matches tuples + * @action_values: actions values + * @action_values_count: count of actions values + * @counter: value of counter + * @counter_valid: Specify if value is valid from hardware + */ +struct devlink_dpipe_entry { + u64 index; + struct devlink_dpipe_value *match_values; + unsigned int match_values_count; + struct devlink_dpipe_value *action_values; + unsigned int action_values_count; + u64 counter; + bool counter_valid; +}; + +/** + * struct devlink_dpipe_dump_ctx - context provided to driver in order + * to dump + * @info: info + * @cmd: devlink command + * @skb: skb + * @nest: top attribute + * @hdr: hdr + */ +struct devlink_dpipe_dump_ctx { + struct genl_info *info; + enum devlink_command cmd; + struct sk_buff *skb; + struct nlattr *nest; + void *hdr; +}; + +struct devlink_dpipe_table_ops; + +/** + * struct devlink_dpipe_table - table object + * @priv: private + * @name: table name + * @size: maximum number of entries + * @counters_enabled: indicates if counters are active + * @counter_control_extern: indicates if counter control is in dpipe or + * external tool + * @table_ops: table operations + * @rcu: rcu + */ +struct devlink_dpipe_table { + void *priv; + struct list_head list; + const char *name; + u64 size; + bool counters_enabled; + bool counter_control_extern; + struct devlink_dpipe_table_ops *table_ops; + struct rcu_head rcu; +}; + +/** + * struct devlink_dpipe_table_ops - dpipe_table ops + * @actions_dump - dumps all tables actions + * @matches_dump - dumps all tables matches + * @entries_dump - dumps all active entries in the table + * @counters_set_update - when changing the counter status hardware sync + * maybe needed to allocate/free counter related + * resources + */ +struct devlink_dpipe_table_ops { + int (*actions_dump)(void *priv, struct sk_buff *skb); + int (*matches_dump)(void *priv, struct sk_buff *skb); + int (*entries_dump)(void *priv, bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx); + int (*counters_set_update)(void *priv, bool enable); +}; + +/** + * struct devlink_dpipe_headers - dpipe headers + * @headers - header array can be shared (global bit) or driver specific + * @headers_count - count of headers + */ +struct devlink_dpipe_headers { + struct devlink_dpipe_header **headers; + unsigned int headers_count; +}; + struct devlink_ops { int (*port_type_set)(struct devlink_port *devlink_port, enum devlink_port_type port_type); @@ -94,6 +268,8 @@ struct devlink_ops { int (*eswitch_mode_set)(struct devlink *devlink, u16 mode); int (*eswitch_inline_mode_get)(struct devlink *devlink, u8 *p_inline_mode); int (*eswitch_inline_mode_set)(struct devlink *devlink, u8 inline_mode); + int (*eswitch_encap_mode_get)(struct devlink *devlink, u8 *p_encap_mode); + int (*eswitch_encap_mode_set)(struct devlink *devlink, u8 encap_mode); }; static inline void *devlink_priv(struct devlink *devlink) @@ -132,6 +308,26 @@ int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, u16 egress_pools_count, u16 ingress_tc_count, u16 egress_tc_count); void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index); +int devlink_dpipe_table_register(struct devlink *devlink, + const char *table_name, + struct devlink_dpipe_table_ops *table_ops, + void *priv, u64 size, + bool counter_control_extern); +void devlink_dpipe_table_unregister(struct devlink *devlink, + const char *table_name); +int devlink_dpipe_headers_register(struct devlink *devlink, + struct devlink_dpipe_headers *dpipe_headers); +void devlink_dpipe_headers_unregister(struct devlink *devlink); +bool devlink_dpipe_table_counter_enabled(struct devlink *devlink, + const char *table_name); +int devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx); +int devlink_dpipe_entry_ctx_append(struct devlink_dpipe_dump_ctx *dump_ctx, + struct devlink_dpipe_entry *entry); +int devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx); +int devlink_dpipe_action_put(struct sk_buff *skb, + struct devlink_dpipe_action *action); +int devlink_dpipe_match_put(struct sk_buff *skb, + struct devlink_dpipe_match *match); #else @@ -200,6 +396,71 @@ static inline void devlink_sb_unregister(struct devlink *devlink, { } +static inline int +devlink_dpipe_table_register(struct devlink *devlink, + const char *table_name, + struct devlink_dpipe_table_ops *table_ops, + void *priv, u64 size, + bool counter_control_extern) +{ + return 0; +} + +static inline void devlink_dpipe_table_unregister(struct devlink *devlink, + const char *table_name) +{ +} + +static inline int devlink_dpipe_headers_register(struct devlink *devlink, + struct devlink_dpipe_headers * + dpipe_headers) +{ + return 0; +} + +static inline void devlink_dpipe_headers_unregister(struct devlink *devlink) +{ +} + +static inline bool devlink_dpipe_table_counter_enabled(struct devlink *devlink, + const char *table_name) +{ + return false; +} + +static inline int +devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx) +{ + return 0; +} + +static inline int +devlink_dpipe_entry_ctx_append(struct devlink_dpipe_dump_ctx *dump_ctx, + struct devlink_dpipe_entry *entry) +{ + return 0; +} + +static inline int +devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx) +{ + return 0; +} + +static inline int +devlink_dpipe_action_put(struct sk_buff *skb, + struct devlink_dpipe_action *action) +{ + return 0; +} + +static inline int +devlink_dpipe_match_put(struct sk_buff *skb, + struct devlink_dpipe_match *match) +{ + return 0; +} + #endif #endif /* _NET_DEVLINK_H_ */ diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h index f2ca135ddcc9..81210a8b8d7c 100644 --- a/include/net/dn_fib.h +++ b/include/net/dn_fib.h @@ -2,6 +2,7 @@ #define _NET_DN_FIB_H #include <linux/netlink.h> +#include <linux/refcount.h> extern const struct nla_policy rtm_dn_policy[]; @@ -28,7 +29,7 @@ struct dn_fib_info { struct dn_fib_info *fib_next; struct dn_fib_info *fib_prev; int fib_treeref; - atomic_t fib_clntref; + refcount_t fib_clntref; int fib_dead; unsigned int fib_flags; int fib_protocol; @@ -130,7 +131,7 @@ void dn_fib_free_info(struct dn_fib_info *fi); static inline void dn_fib_info_put(struct dn_fib_info *fi) { - if (atomic_dec_and_test(&fi->fib_clntref)) + if (refcount_dec_and_test(&fi->fib_clntref)) dn_fib_free_info(fi); } diff --git a/include/net/dsa.h b/include/net/dsa.h index b122196d5a1f..58969b9a090c 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -11,22 +11,31 @@ #ifndef __LINUX_NET_DSA_H #define __LINUX_NET_DSA_H +#include <linux/if.h> #include <linux/if_ether.h> #include <linux/list.h> +#include <linux/notifier.h> #include <linux/timer.h> #include <linux/workqueue.h> #include <linux/of.h> -#include <linux/phy.h> -#include <linux/phy_fixed.h> #include <linux/ethtool.h> +#include <net/devlink.h> +#include <net/switchdev.h> + +struct tc_action; +struct phy_device; +struct fixed_phy_status; enum dsa_tag_protocol { DSA_TAG_PROTO_NONE = 0, + DSA_TAG_PROTO_BRCM, DSA_TAG_PROTO_DSA, - DSA_TAG_PROTO_TRAILER, DSA_TAG_PROTO_EDSA, - DSA_TAG_PROTO_BRCM, + DSA_TAG_PROTO_KSZ, + DSA_TAG_PROTO_LAN9303, + DSA_TAG_PROTO_MTK, DSA_TAG_PROTO_QCA, + DSA_TAG_PROTO_TRAILER, DSA_TAG_LAST, /* MUST BE LAST */ }; @@ -42,6 +51,11 @@ struct dsa_chip_data { struct device *host_dev; int sw_addr; + /* + * Reference to network devices + */ + struct device *netdev[DSA_MAX_PORTS]; + /* set to size of eeprom if supported by the switch */ int eeprom_len; @@ -90,6 +104,9 @@ struct packet_type; struct dsa_switch_tree { struct list_head list; + /* Notifier chain for switch-wide events */ + struct raw_notifier_head nh; + /* Tree identifier */ u32 tree; @@ -105,27 +122,16 @@ struct dsa_switch_tree { */ struct dsa_platform_data *pd; - /* - * Reference to network device to use, and which tagging - * protocol to use. - */ - struct net_device *master_netdev; - int (*rcv)(struct sk_buff *skb, + /* Copy of tag_ops->rcv for faster access in hot path */ + struct sk_buff * (*rcv)(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); /* - * Original copy of the master netdev ethtool_ops - */ - struct ethtool_ops master_ethtool_ops; - const struct ethtool_ops *master_orig_ethtool_ops; - - /* - * The switch and port to which the CPU is attached. + * The switch port to which the CPU is attached. */ - s8 cpu_switch; - s8 cpu_port; + struct dsa_port *cpu_dp; /* * Data for the individual switch chips. @@ -139,11 +145,44 @@ struct dsa_switch_tree { const struct dsa_device_ops *tag_ops; }; +/* TC matchall action types, only mirroring for now */ +enum dsa_port_mall_action_type { + DSA_PORT_MALL_MIRROR, +}; + +/* TC mirroring entry */ +struct dsa_mall_mirror_tc_entry { + u8 to_local_port; + bool ingress; +}; + +/* TC matchall entry */ +struct dsa_mall_tc_entry { + struct list_head list; + unsigned long cookie; + enum dsa_port_mall_action_type type; + union { + struct dsa_mall_mirror_tc_entry mirror; + }; +}; + + struct dsa_port { + struct dsa_switch *ds; + unsigned int index; + const char *name; + struct dsa_port *cpu_dp; struct net_device *netdev; struct device_node *dn; unsigned int ageing_time; u8 stp_state; + struct net_device *bridge_dev; + struct devlink_port devlink_port; + /* + * Original copy of the master netdev ethtool_ops + */ + struct ethtool_ops ethtool_ops; + const struct ethtool_ops *orig_ethtool_ops; }; struct dsa_switch { @@ -155,6 +194,9 @@ struct dsa_switch { struct dsa_switch_tree *dst; int index; + /* Listener for switch fabric events */ + struct notifier_block nb; + /* * Give the switch driver somewhere to hang its private data * structure. @@ -169,7 +211,7 @@ struct dsa_switch { /* * The switch operations. */ - struct dsa_switch_ops *ops; + const struct dsa_switch_ops *ops; /* * An array of which element [a] indicates which port on this @@ -178,19 +220,6 @@ struct dsa_switch { */ s8 rtable[DSA_MAX_SWITCHES]; -#ifdef CONFIG_NET_DSA_HWMON - /* - * Hardware monitoring information - */ - char hwmon_name[IFNAMSIZ + 8]; - struct device *hwmon_dev; -#endif - - /* - * The lower device this switch uses to talk to the host - */ - struct net_device *master_netdev; - /* * Slave mii_bus and devices for the individual ports. */ @@ -198,13 +227,23 @@ struct dsa_switch { u32 cpu_port_mask; u32 enabled_port_mask; u32 phys_mii_mask; - struct dsa_port ports[DSA_MAX_PORTS]; struct mii_bus *slave_mii_bus; + + /* Ageing Time limits in msecs */ + unsigned int ageing_time_min; + unsigned int ageing_time_max; + + /* devlink used to represent this switch device */ + struct devlink *devlink; + + /* Dynamically allocated ports, keep last */ + size_t num_ports; + struct dsa_port ports[]; }; static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p) { - return !!(ds->index == ds->dst->cpu_switch && p == ds->dst->cpu_port); + return !!(ds->cpu_port_mask & (1 << p)); } static inline bool dsa_is_dsa_port(struct dsa_switch *ds, int p) @@ -212,6 +251,11 @@ static inline bool dsa_is_dsa_port(struct dsa_switch *ds, int p) return !!((ds->dsa_port_mask) & (1 << p)); } +static inline bool dsa_is_normal_port(struct dsa_switch *ds, int p) +{ + return !dsa_is_cpu_port(ds, p) && !dsa_is_dsa_port(ds, p); +} + static inline bool dsa_is_port_initialized(struct dsa_switch *ds, int p) { return ds->enabled_port_mask & (1 << p) && ds->ports[p].netdev; @@ -227,23 +271,15 @@ static inline u8 dsa_upstream_port(struct dsa_switch *ds) * Else return the (DSA) port number that connects to the * switch that is one hop closer to the cpu. */ - if (dst->cpu_switch == ds->index) - return dst->cpu_port; + if (dst->cpu_dp->ds == ds) + return dst->cpu_dp->index; else - return ds->rtable[dst->cpu_switch]; + return ds->rtable[dst->cpu_dp->ds->index]; } -struct switchdev_trans; -struct switchdev_obj; -struct switchdev_obj_port_fdb; -struct switchdev_obj_port_mdb; -struct switchdev_obj_port_vlan; - struct dsa_switch_ops { - struct list_head list; - /* - * Probing and setup. + * Legacy probing. */ const char *(*probe)(struct device *dsa_dev, struct device *host_dev, int sw_addr, @@ -309,14 +345,6 @@ struct dsa_switch_ops { int (*get_eee)(struct dsa_switch *ds, int port, struct ethtool_eee *e); -#ifdef CONFIG_NET_DSA_HWMON - /* Hardware monitoring */ - int (*get_temp)(struct dsa_switch *ds, int *temp); - int (*get_temp_limit)(struct dsa_switch *ds, int *temp); - int (*set_temp_limit)(struct dsa_switch *ds, int temp); - int (*get_temp_alarm)(struct dsa_switch *ds, bool *alarm); -#endif - /* EEPROM access */ int (*get_eeprom_len)(struct dsa_switch *ds); int (*get_eeprom)(struct dsa_switch *ds, @@ -337,7 +365,8 @@ struct dsa_switch_ops { int (*set_ageing_time)(struct dsa_switch *ds, unsigned int msecs); int (*port_bridge_join)(struct dsa_switch *ds, int port, struct net_device *bridge); - void (*port_bridge_leave)(struct dsa_switch *ds, int port); + void (*port_bridge_leave)(struct dsa_switch *ds, int port, + struct net_device *bridge); void (*port_stp_state_set)(struct dsa_switch *ds, int port, u8 state); void (*port_fast_age)(struct dsa_switch *ds, int port); @@ -357,7 +386,7 @@ struct dsa_switch_ops { const struct switchdev_obj_port_vlan *vlan); int (*port_vlan_dump)(struct dsa_switch *ds, int port, struct switchdev_obj_port_vlan *vlan, - int (*cb)(struct switchdev_obj *obj)); + switchdev_obj_dump_cb_t *cb); /* * Forwarding database @@ -372,7 +401,7 @@ struct dsa_switch_ops { const struct switchdev_obj_port_fdb *fdb); int (*port_fdb_dump)(struct dsa_switch *ds, int port, struct switchdev_obj_port_fdb *fdb, - int (*cb)(struct switchdev_obj *obj)); + switchdev_obj_dump_cb_t *cb); /* * Multicast database @@ -387,20 +416,58 @@ struct dsa_switch_ops { const struct switchdev_obj_port_mdb *mdb); int (*port_mdb_dump)(struct dsa_switch *ds, int port, struct switchdev_obj_port_mdb *mdb, - int (*cb)(struct switchdev_obj *obj)); + switchdev_obj_dump_cb_t *cb); + + /* + * RXNFC + */ + int (*get_rxnfc)(struct dsa_switch *ds, int port, + struct ethtool_rxnfc *nfc, u32 *rule_locs); + int (*set_rxnfc)(struct dsa_switch *ds, int port, + struct ethtool_rxnfc *nfc); + + /* + * TC integration + */ + int (*port_mirror_add)(struct dsa_switch *ds, int port, + struct dsa_mall_mirror_tc_entry *mirror, + bool ingress); + void (*port_mirror_del)(struct dsa_switch *ds, int port, + struct dsa_mall_mirror_tc_entry *mirror); + + /* + * Cross-chip operations + */ + int (*crosschip_bridge_join)(struct dsa_switch *ds, int sw_index, + int port, struct net_device *br); + void (*crosschip_bridge_leave)(struct dsa_switch *ds, int sw_index, + int port, struct net_device *br); }; -void register_switch_driver(struct dsa_switch_ops *type); -void unregister_switch_driver(struct dsa_switch_ops *type); +struct dsa_switch_driver { + struct list_head list; + const struct dsa_switch_ops *ops; +}; + +/* Legacy driver registration */ +void register_switch_driver(struct dsa_switch_driver *type); +void unregister_switch_driver(struct dsa_switch_driver *type); struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev); -static inline bool dsa_uses_tagged_protocol(struct dsa_switch_tree *dst) +struct net_device *dsa_dev_to_net_device(struct device *dev); + +/* Keep inline for faster access in hot path */ +static inline bool netdev_uses_dsa(struct net_device *dev) { - return dst->rcv != NULL; +#if IS_ENABLED(CONFIG_NET_DSA) + return dev->dsa_ptr && dev->dsa_ptr->rcv; +#endif + return false; } +struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n); void dsa_unregister_switch(struct dsa_switch *ds); -int dsa_register_switch(struct dsa_switch *ds, struct device_node *np); +int dsa_register_switch(struct dsa_switch *ds); #ifdef CONFIG_PM_SLEEP int dsa_switch_suspend(struct dsa_switch *ds); int dsa_switch_resume(struct dsa_switch *ds); diff --git a/include/net/dst.h b/include/net/dst.h index 6835d224d47b..f73611ec4017 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -31,9 +31,9 @@ struct sk_buff; struct dst_entry { + struct net_device *dev; struct rcu_head rcu_head; struct dst_entry *child; - struct net_device *dev; struct dst_ops *ops; unsigned long _metrics; unsigned long expires; @@ -51,15 +51,11 @@ struct dst_entry { #define DST_HOST 0x0001 #define DST_NOXFRM 0x0002 #define DST_NOPOLICY 0x0004 -#define DST_NOHASH 0x0008 -#define DST_NOCACHE 0x0010 -#define DST_NOCOUNT 0x0020 -#define DST_FAKE_RTABLE 0x0040 -#define DST_XFRM_TUNNEL 0x0080 -#define DST_XFRM_QUEUE 0x0100 -#define DST_METADATA 0x0200 - - unsigned short pending_confirm; +#define DST_NOCOUNT 0x0008 +#define DST_FAKE_RTABLE 0x0010 +#define DST_XFRM_TUNNEL 0x0020 +#define DST_XFRM_QUEUE 0x0040 +#define DST_METADATA 0x0080 short error; @@ -78,6 +74,8 @@ struct dst_entry { #define DST_OBSOLETE_KILL -2 unsigned short header_len; /* more space at head required */ unsigned short trailer_len; /* space to reserve at tail */ + unsigned short __pad3; + #ifdef CONFIG_IP_ROUTE_CLASSID __u32 tclassid; #else @@ -107,10 +105,16 @@ struct dst_entry { }; }; +struct dst_metrics { + u32 metrics[RTAX_MAX]; + atomic_t refcnt; +}; +extern const struct dst_metrics dst_default_metrics; + u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); -extern const u32 dst_default_metrics[]; #define DST_METRICS_READ_ONLY 0x1UL +#define DST_METRICS_REFCOUNTED 0x2UL #define DST_METRICS_FLAGS 0x3UL #define __DST_METRICS_PTR(Y) \ ((u32 *)((Y) & ~DST_METRICS_FLAGS)) @@ -247,7 +251,7 @@ static inline void dst_hold(struct dst_entry *dst) * __pad_to_align_refcnt declaration in struct dst_entry */ BUILD_BUG_ON(offsetof(struct dst_entry, __refcnt) & 63); - atomic_inc(&dst->__refcnt); + WARN_ON(atomic_inc_not_zero(&dst->__refcnt) == 0); } static inline void dst_use(struct dst_entry *dst, unsigned long time) @@ -272,6 +276,8 @@ static inline struct dst_entry *dst_clone(struct dst_entry *dst) void dst_release(struct dst_entry *dst); +void dst_release_immediate(struct dst_entry *dst); + static inline void refdst_drop(unsigned long refdst) { if (!(refdst & SKB_DST_NOREF)) @@ -328,10 +334,7 @@ static inline void skb_dst_force(struct sk_buff *skb) */ static inline bool dst_hold_safe(struct dst_entry *dst) { - if (dst->flags & DST_NOCACHE) - return atomic_inc_not_zero(&dst->__refcnt); - dst_hold(dst); - return true; + return atomic_inc_not_zero(&dst->__refcnt); } /** @@ -417,51 +420,11 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_ref, void dst_init(struct dst_entry *dst, struct dst_ops *ops, struct net_device *dev, int initial_ref, int initial_obsolete, unsigned short flags); -void __dst_free(struct dst_entry *dst); struct dst_entry *dst_destroy(struct dst_entry *dst); - -static inline void dst_free(struct dst_entry *dst) -{ - if (dst->obsolete > 0) - return; - if (!atomic_read(&dst->__refcnt)) { - dst = dst_destroy(dst); - if (!dst) - return; - } - __dst_free(dst); -} - -static inline void dst_rcu_free(struct rcu_head *head) -{ - struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); - dst_free(dst); -} +void dst_dev_put(struct dst_entry *dst); static inline void dst_confirm(struct dst_entry *dst) { - dst->pending_confirm = 1; -} - -static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n, - struct sk_buff *skb) -{ - const struct hh_cache *hh; - - if (dst->pending_confirm) { - unsigned long now = jiffies; - - dst->pending_confirm = 0; - /* avoid dirtying neighbour */ - if (n->confirmed != now) - n->confirmed = now; - } - - hh = &n->hh; - if ((n->nud_state & NUD_CONNECTED) && hh->hh_len) - return neigh_hh_output(hh, skb); - else - return n->output(n, skb); } static inline struct neighbour *dst_neigh_lookup(const struct dst_entry *dst, const void *daddr) @@ -477,6 +440,13 @@ static inline struct neighbour *dst_neigh_lookup_skb(const struct dst_entry *dst return IS_ERR(n) ? NULL : n; } +static inline void dst_confirm_neigh(const struct dst_entry *dst, + const void *daddr) +{ + if (dst->ops->confirm_neigh) + dst->ops->confirm_neigh(dst, daddr); +} + static inline void dst_link_failure(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -514,8 +484,6 @@ static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie) return dst; } -void dst_subsys_init(void); - /* Flags for xfrm_lookup flags argument. */ enum { XFRM_LOOKUP_ICMP = 1 << 0, diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 701fc814d0af..a803129a4849 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -5,10 +5,22 @@ #include <net/ip_tunnels.h> #include <net/dst.h> +enum metadata_type { + METADATA_IP_TUNNEL, + METADATA_HW_PORT_MUX, +}; + +struct hw_port_info { + struct net_device *lower_dev; + u32 port_id; +}; + struct metadata_dst { struct dst_entry dst; + enum metadata_type type; union { struct ip_tunnel_info tun_info; + struct hw_port_info port_info; } u; }; @@ -27,7 +39,7 @@ static inline struct ip_tunnel_info *skb_tunnel_info(struct sk_buff *skb) struct metadata_dst *md_dst = skb_metadata_dst(skb); struct dst_entry *dst; - if (md_dst) + if (md_dst && md_dst->type == METADATA_IP_TUNNEL) return &md_dst->u.tun_info; dst = skb_dst(skb); @@ -55,22 +67,33 @@ static inline int skb_metadata_dst_cmp(const struct sk_buff *skb_a, a = (const struct metadata_dst *) skb_dst(skb_a); b = (const struct metadata_dst *) skb_dst(skb_b); - if (!a != !b || a->u.tun_info.options_len != b->u.tun_info.options_len) + if (!a != !b || a->type != b->type) return 1; - return memcmp(&a->u.tun_info, &b->u.tun_info, - sizeof(a->u.tun_info) + a->u.tun_info.options_len); + switch (a->type) { + case METADATA_HW_PORT_MUX: + return memcmp(&a->u.port_info, &b->u.port_info, + sizeof(a->u.port_info)); + case METADATA_IP_TUNNEL: + return memcmp(&a->u.tun_info, &b->u.tun_info, + sizeof(a->u.tun_info) + + a->u.tun_info.options_len); + default: + return 1; + } } void metadata_dst_free(struct metadata_dst *); -struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags); -struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags); +struct metadata_dst *metadata_dst_alloc(u8 optslen, enum metadata_type type, + gfp_t flags); +struct metadata_dst __percpu * +metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags); static inline struct metadata_dst *tun_rx_dst(int md_size) { struct metadata_dst *tun_dst; - tun_dst = metadata_dst_alloc(md_size, GFP_ATOMIC); + tun_dst = metadata_dst_alloc(md_size, METADATA_IP_TUNNEL, GFP_ATOMIC); if (!tun_dst) return NULL; @@ -85,11 +108,11 @@ static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb) int md_size; struct metadata_dst *new_md; - if (!md_dst) + if (!md_dst || md_dst->type != METADATA_IP_TUNNEL) return ERR_PTR(-EINVAL); md_size = md_dst->u.tun_info.options_len; - new_md = metadata_dst_alloc(md_size, GFP_ATOMIC); + new_md = metadata_dst_alloc(md_size, METADATA_IP_TUNNEL, GFP_ATOMIC); if (!new_md) return ERR_PTR(-ENOMEM); diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index a0d443ca16fc..c84b3287e38b 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -33,6 +33,8 @@ struct dst_ops { struct neighbour * (*neigh_lookup)(const struct dst_entry *dst, struct sk_buff *skb, const void *daddr); + void (*confirm_neigh)(const struct dst_entry *dst, + const void *daddr); struct kmem_cache *kmem_cachep; @@ -46,19 +48,12 @@ static inline int dst_entries_get_fast(struct dst_ops *dst) static inline int dst_entries_get_slow(struct dst_ops *dst) { - int res; - - local_bh_disable(); - res = percpu_counter_sum_positive(&dst->pcpuc_entries); - local_bh_enable(); - return res; + return percpu_counter_sum_positive(&dst->pcpuc_entries); } static inline void dst_entries_add(struct dst_ops *dst, int val) { - local_bh_disable(); percpu_counter_add(&dst->pcpuc_entries, val); - local_bh_enable(); } static inline int dst_entries_init(struct dst_ops *dst) diff --git a/include/net/esp.h b/include/net/esp.h index a43be85aedc4..c41994d1bfef 100644 --- a/include/net/esp.h +++ b/include/net/esp.h @@ -10,4 +10,23 @@ static inline struct ip_esp_hdr *ip_esp_hdr(const struct sk_buff *skb) return (struct ip_esp_hdr *)skb_transport_header(skb); } +struct esp_info { + struct ip_esp_hdr *esph; + __be64 seqno; + int tfclen; + int tailen; + int plen; + int clen; + int len; + int nfrags; + __u8 proto; + bool inplace; +}; + +int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp); +int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp); +int esp_input_done2(struct sk_buff *skb, int err); +int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp); +int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp); +int esp6_input_done2(struct sk_buff *skb, int err); #endif diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 8dbfdf728cd8..c487bfa2f479 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -5,6 +5,7 @@ #include <linux/slab.h> #include <linux/netdevice.h> #include <linux/fib_rules.h> +#include <linux/refcount.h> #include <net/flow.h> #include <net/rtnetlink.h> @@ -29,7 +30,7 @@ struct fib_rule { struct fib_rule __rcu *ctarget; struct net *fr_net; - atomic_t refcnt; + refcount_t refcnt; u32 pref; int suppress_ifgroup; int suppress_prefixlen; @@ -103,12 +104,12 @@ struct fib_rules_ops { static inline void fib_rule_get(struct fib_rule *rule) { - atomic_inc(&rule->refcnt); + refcount_inc(&rule->refcnt); } static inline void fib_rule_put(struct fib_rule *rule) { - if (atomic_dec_and_test(&rule->refcnt)) + if (refcount_dec_and_test(&rule->refcnt)) kfree_rcu(rule, rcu); } @@ -141,7 +142,10 @@ int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags, struct fib_lookup_arg *); int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table, u32 flags); +bool fib_rule_matchall(const struct fib_rule *rule); -int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh); -int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh); +int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack); +int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack); #endif diff --git a/include/net/flow.h b/include/net/flow.h index 6984f1913dc1..bae198b3039e 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -202,7 +202,7 @@ static inline struct flowi *flowidn_to_flowi(struct flowidn *fldn) typedef unsigned long flow_compare_t; -static inline size_t flow_key_size(u16 family) +static inline unsigned int flow_key_size(u16 family) { switch (family) { case AF_INET: diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index d896a33e00d4..e2663e900b0a 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -41,6 +41,13 @@ struct flow_dissector_key_vlan { u16 padding; }; +struct flow_dissector_key_mpls { + u32 mpls_ttl:8, + mpls_bos:1, + mpls_tc:3, + mpls_label:20; +}; + struct flow_dissector_key_keyid { __be32 keyid; }; @@ -89,6 +96,24 @@ struct flow_dissector_key_addrs { }; /** + * flow_dissector_key_arp: + * @ports: Operation, source and target addresses for an ARP header + * for Ethernet hardware addresses and IPv4 protocol addresses + * sip: Sender IP address + * tip: Target IP address + * op: Operation + * sha: Sender hardware address + * tpa: Target hardware address + */ +struct flow_dissector_key_arp { + __u32 sip; + __u32 tip; + __u8 op; + unsigned char sha[ETH_ALEN]; + unsigned char tha[ETH_ALEN]; +}; + +/** * flow_dissector_key_tp_ports: * @ports: port numbers of Transport header * src: source port number @@ -132,6 +157,24 @@ struct flow_dissector_key_eth_addrs { unsigned char src[ETH_ALEN]; }; +/** + * struct flow_dissector_key_tcp: + * @flags: flags + */ +struct flow_dissector_key_tcp { + __be16 flags; +}; + +/** + * struct flow_dissector_key_ip: + * @tos: tos + * @ttl: ttl + */ +struct flow_dissector_key_ip { + __u8 tos; + __u8 ttl; +}; + enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ @@ -141,6 +184,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_ICMP, /* struct flow_dissector_key_icmp */ FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */ FLOW_DISSECTOR_KEY_TIPC_ADDRS, /* struct flow_dissector_key_tipc_addrs */ + FLOW_DISSECTOR_KEY_ARP, /* struct flow_dissector_key_arp */ FLOW_DISSECTOR_KEY_VLAN, /* struct flow_dissector_key_flow_vlan */ FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_flow_tags */ FLOW_DISSECTOR_KEY_GRE_KEYID, /* struct flow_dissector_key_keyid */ @@ -150,6 +194,9 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */ FLOW_DISSECTOR_KEY_ENC_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_ENC_PORTS, /* struct flow_dissector_key_ports */ + FLOW_DISSECTOR_KEY_MPLS, /* struct flow_dissector_key_mpls */ + FLOW_DISSECTOR_KEY_TCP, /* struct flow_dissector_key_tcp */ + FLOW_DISSECTOR_KEY_IP, /* struct flow_dissector_key_ip */ FLOW_DISSECTOR_KEY_MAX, }; diff --git a/include/net/flowcache.h b/include/net/flowcache.h index 9caf3bfc8d2d..51eb971e8973 100644 --- a/include/net/flowcache.h +++ b/include/net/flowcache.h @@ -8,7 +8,7 @@ struct flow_cache_percpu { struct hlist_head *hash_table; - int hash_count; + unsigned int hash_count; u32 hash_rnd; int hash_rnd_recalc; struct tasklet_struct flush_tasklet; @@ -18,8 +18,8 @@ struct flow_cache { u32 hash_shift; struct flow_cache_percpu __percpu *percpu; struct hlist_node node; - int low_watermark; - int high_watermark; + unsigned int low_watermark; + unsigned int high_watermark; struct timer_list rnd_timer; }; #endif /* _NET_FLOWCACHE_H */ diff --git a/include/net/genetlink.h b/include/net/genetlink.h index a34275be3600..c59a098221db 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -84,6 +84,7 @@ struct nlattr **genl_family_attrbuf(const struct genl_family *family); * @attrs: netlink attributes * @_net: network namespace * @user_ptr: user pointers + * @extack: extended ACK report struct */ struct genl_info { u32 snd_seq; @@ -94,6 +95,7 @@ struct genl_info { struct nlattr ** attrs; possible_net_t _net; void * user_ptr[2]; + struct netlink_ext_ack *extack; }; static inline struct net *genl_info_net(struct genl_info *info) @@ -106,6 +108,16 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net) write_pnet(&info->_net, net); } +#define GENL_SET_ERR_MSG(info, msg) NL_SET_ERR_MSG((info)->extack, msg) + +static inline int genl_err_attr(struct genl_info *info, int err, + struct nlattr *attr) +{ + info->extack->bad_attr = attr; + + return err; +} + /** * struct genl_ops - generic netlink operations * @cmd: command identifier @@ -116,7 +128,6 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net) * @start: start callback for dumps * @dumpit: callback for dumpers * @done: completion callback for dumps - * @ops_list: operations list */ struct genl_ops { const struct nla_policy *policy; @@ -162,14 +173,16 @@ genlmsg_nlhdr(void *user_hdr, const struct genl_family *family) * @tb: destination array with maxtype+1 elements * @maxtype: maximum attribute type to be expected * @policy: validation policy - * */ + * @extack: extended ACK report struct + */ static inline int genlmsg_parse(const struct nlmsghdr *nlh, const struct genl_family *family, struct nlattr *tb[], int maxtype, - const struct nla_policy *policy) + const struct nla_policy *policy, + struct netlink_ext_ack *extack) { return nlmsg_parse(nlh, family->hdrsize + GENL_HDRLEN, tb, maxtype, - policy); + policy, extack); } /** diff --git a/include/net/gro_cells.h b/include/net/gro_cells.h index 2a1abbf8da74..fcaf8f479130 100644 --- a/include/net/gro_cells.h +++ b/include/net/gro_cells.h @@ -5,92 +5,14 @@ #include <linux/slab.h> #include <linux/netdevice.h> -struct gro_cell { - struct sk_buff_head napi_skbs; - struct napi_struct napi; -}; +struct gro_cell; struct gro_cells { struct gro_cell __percpu *cells; }; -static inline int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb) -{ - struct gro_cell *cell; - struct net_device *dev = skb->dev; - - if (!gcells->cells || skb_cloned(skb) || !(dev->features & NETIF_F_GRO)) - return netif_rx(skb); - - cell = this_cpu_ptr(gcells->cells); - - if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) { - atomic_long_inc(&dev->rx_dropped); - kfree_skb(skb); - return NET_RX_DROP; - } - - __skb_queue_tail(&cell->napi_skbs, skb); - if (skb_queue_len(&cell->napi_skbs) == 1) - napi_schedule(&cell->napi); - return NET_RX_SUCCESS; -} - -/* called under BH context */ -static inline int gro_cell_poll(struct napi_struct *napi, int budget) -{ - struct gro_cell *cell = container_of(napi, struct gro_cell, napi); - struct sk_buff *skb; - int work_done = 0; - - while (work_done < budget) { - skb = __skb_dequeue(&cell->napi_skbs); - if (!skb) - break; - napi_gro_receive(napi, skb); - work_done++; - } - - if (work_done < budget) - napi_complete_done(napi, work_done); - return work_done; -} - -static inline int gro_cells_init(struct gro_cells *gcells, struct net_device *dev) -{ - int i; - - gcells->cells = alloc_percpu(struct gro_cell); - if (!gcells->cells) - return -ENOMEM; - - for_each_possible_cpu(i) { - struct gro_cell *cell = per_cpu_ptr(gcells->cells, i); - - __skb_queue_head_init(&cell->napi_skbs); - - set_bit(NAPI_STATE_NO_BUSY_POLL, &cell->napi.state); - - netif_napi_add(dev, &cell->napi, gro_cell_poll, 64); - napi_enable(&cell->napi); - } - return 0; -} - -static inline void gro_cells_destroy(struct gro_cells *gcells) -{ - int i; - - if (!gcells->cells) - return; - for_each_possible_cpu(i) { - struct gro_cell *cell = per_cpu_ptr(gcells->cells, i); - - netif_napi_del(&cell->napi); - __skb_queue_purge(&cell->napi_skbs); - } - free_percpu(gcells->cells); - gcells->cells = NULL; -} +int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb); +int gro_cells_init(struct gro_cells *gcells, struct net_device *dev); +void gro_cells_destroy(struct gro_cells *gcells); #endif diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index d0e7e3f8e67a..d91f9e7f4d71 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -1,201 +1,54 @@ /* - * Copyright (c) 2003, 2004 David Young. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of David Young may not be used to endorse or promote - * products derived from this software without specific prior - * written permission. - * - * THIS SOFTWARE IS PROVIDED BY DAVID YOUNG ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DAVID - * YOUNG BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY - * OF SUCH DAMAGE. - */ - -/* - * Modifications to fit into the linux IEEE 802.11 stack, - * Mike Kershaw ([email protected]) + * Copyright (c) 2017 Intel Deutschland GmbH + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#ifndef __RADIOTAP_H +#define __RADIOTAP_H -#ifndef IEEE80211RADIOTAP_H -#define IEEE80211RADIOTAP_H - -#include <linux/if_ether.h> #include <linux/kernel.h> #include <asm/unaligned.h> -/* Base version of the radiotap packet header data */ -#define PKTHDR_RADIOTAP_VERSION 0 - -/* A generic radio capture format is desirable. There is one for - * Linux, but it is neither rigidly defined (there were not even - * units given for some fields) nor easily extensible. - * - * I suggest the following extensible radio capture format. It is - * based on a bitmap indicating which fields are present. - * - * I am trying to describe precisely what the application programmer - * should expect in the following, and for that reason I tell the - * units and origin of each measurement (where it applies), or else I - * use sufficiently weaselly language ("is a monotonically nondecreasing - * function of...") that I cannot set false expectations for lawyerly - * readers. - */ - -/* - * The radio capture header precedes the 802.11 header. - * All data in the header is little endian on all platforms. +/** + * struct ieee82011_radiotap_header - base radiotap header */ struct ieee80211_radiotap_header { - u8 it_version; /* Version 0. Only increases - * for drastic changes, - * introduction of compatible - * new fields does not count. - */ - u8 it_pad; - __le16 it_len; /* length of the whole - * header in bytes, including - * it_version, it_pad, - * it_len, and data fields. - */ - __le32 it_present; /* A bitmap telling which - * fields are present. Set bit 31 - * (0x80000000) to extend the - * bitmap by another 32 bits. - * Additional extensions are made - * by setting bit 31. - */ + /** + * @it_version: radiotap version, always 0 + */ + uint8_t it_version; + + /** + * @it_pad: padding (or alignment) + */ + uint8_t it_pad; + + /** + * @it_len: overall radiotap header length + */ + __le16 it_len; + + /** + * @it_present: (first) present word + */ + __le32 it_present; } __packed; -/* Name Data type Units - * ---- --------- ----- - * - * IEEE80211_RADIOTAP_TSFT __le64 microseconds - * - * Value in microseconds of the MAC's 64-bit 802.11 Time - * Synchronization Function timer when the first bit of the - * MPDU arrived at the MAC. For received frames, only. - * - * IEEE80211_RADIOTAP_CHANNEL 2 x __le16 MHz, bitmap - * - * Tx/Rx frequency in MHz, followed by flags (see below). - * - * IEEE80211_RADIOTAP_FHSS __le16 see below - * - * For frequency-hopping radios, the hop set (first byte) - * and pattern (second byte). - * - * IEEE80211_RADIOTAP_RATE u8 500kb/s - * - * Tx/Rx data rate - * - * IEEE80211_RADIOTAP_DBM_ANTSIGNAL s8 decibels from - * one milliwatt (dBm) - * - * RF signal power at the antenna, decibel difference from - * one milliwatt. - * - * IEEE80211_RADIOTAP_DBM_ANTNOISE s8 decibels from - * one milliwatt (dBm) - * - * RF noise power at the antenna, decibel difference from one - * milliwatt. - * - * IEEE80211_RADIOTAP_DB_ANTSIGNAL u8 decibel (dB) - * - * RF signal power at the antenna, decibel difference from an - * arbitrary, fixed reference. - * - * IEEE80211_RADIOTAP_DB_ANTNOISE u8 decibel (dB) - * - * RF noise power at the antenna, decibel difference from an - * arbitrary, fixed reference point. - * - * IEEE80211_RADIOTAP_LOCK_QUALITY __le16 unitless - * - * Quality of Barker code lock. Unitless. Monotonically - * nondecreasing with "better" lock strength. Called "Signal - * Quality" in datasheets. (Is there a standard way to measure - * this?) - * - * IEEE80211_RADIOTAP_TX_ATTENUATION __le16 unitless - * - * Transmit power expressed as unitless distance from max - * power set at factory calibration. 0 is max power. - * Monotonically nondecreasing with lower power levels. - * - * IEEE80211_RADIOTAP_DB_TX_ATTENUATION __le16 decibels (dB) - * - * Transmit power expressed as decibel distance from max power - * set at factory calibration. 0 is max power. Monotonically - * nondecreasing with lower power levels. - * - * IEEE80211_RADIOTAP_DBM_TX_POWER s8 decibels from - * one milliwatt (dBm) - * - * Transmit power expressed as dBm (decibels from a 1 milliwatt - * reference). This is the absolute power level measured at - * the antenna port. - * - * IEEE80211_RADIOTAP_FLAGS u8 bitmap - * - * Properties of transmitted and received frames. See flags - * defined below. - * - * IEEE80211_RADIOTAP_ANTENNA u8 antenna index - * - * Unitless indication of the Rx/Tx antenna for this packet. - * The first antenna is antenna 0. - * - * IEEE80211_RADIOTAP_RX_FLAGS __le16 bitmap - * - * Properties of received frames. See flags defined below. - * - * IEEE80211_RADIOTAP_TX_FLAGS __le16 bitmap - * - * Properties of transmitted frames. See flags defined below. - * - * IEEE80211_RADIOTAP_RTS_RETRIES u8 data - * - * Number of rts retries a transmitted frame used. - * - * IEEE80211_RADIOTAP_DATA_RETRIES u8 data - * - * Number of unicast retries a transmitted frame used. - * - * IEEE80211_RADIOTAP_MCS u8, u8, u8 unitless - * - * Contains a bitmap of known fields/flags, the flags, and - * the MCS index. - * - * IEEE80211_RADIOTAP_AMPDU_STATUS u32, u16, u8, u8 unitless - * - * Contains the AMPDU information for the subframe. - * - * IEEE80211_RADIOTAP_VHT u16, u8, u8, u8[4], u8, u8, u16 - * - * Contains VHT information about this frame. - * - * IEEE80211_RADIOTAP_TIMESTAMP u64, u16, u8, u8 variable - * - * Contains timestamp information for this frame. - */ -enum ieee80211_radiotap_type { +/* version is always 0 */ +#define PKTHDR_RADIOTAP_VERSION 0 + +/* see the radiotap website for the descriptions */ +enum ieee80211_radiotap_presence { IEEE80211_RADIOTAP_TSFT = 0, IEEE80211_RADIOTAP_FLAGS = 1, IEEE80211_RADIOTAP_RATE = 2, @@ -214,7 +67,7 @@ enum ieee80211_radiotap_type { IEEE80211_RADIOTAP_TX_FLAGS = 15, IEEE80211_RADIOTAP_RTS_RETRIES = 16, IEEE80211_RADIOTAP_DATA_RETRIES = 17, - + /* 18 is XChannel, but it's not defined yet */ IEEE80211_RADIOTAP_MCS = 19, IEEE80211_RADIOTAP_AMPDU_STATUS = 20, IEEE80211_RADIOTAP_VHT = 21, @@ -226,129 +79,135 @@ enum ieee80211_radiotap_type { IEEE80211_RADIOTAP_EXT = 31 }; -/* Channel flags. */ -#define IEEE80211_CHAN_TURBO 0x0010 /* Turbo channel */ -#define IEEE80211_CHAN_CCK 0x0020 /* CCK channel */ -#define IEEE80211_CHAN_OFDM 0x0040 /* OFDM channel */ -#define IEEE80211_CHAN_2GHZ 0x0080 /* 2 GHz spectrum channel. */ -#define IEEE80211_CHAN_5GHZ 0x0100 /* 5 GHz spectrum channel */ -#define IEEE80211_CHAN_PASSIVE 0x0200 /* Only passive scan allowed */ -#define IEEE80211_CHAN_DYN 0x0400 /* Dynamic CCK-OFDM channel */ -#define IEEE80211_CHAN_GFSK 0x0800 /* GFSK channel (FHSS PHY) */ -#define IEEE80211_CHAN_GSM 0x1000 /* GSM (900 MHz) */ -#define IEEE80211_CHAN_STURBO 0x2000 /* Static Turbo */ -#define IEEE80211_CHAN_HALF 0x4000 /* Half channel (10 MHz wide) */ -#define IEEE80211_CHAN_QUARTER 0x8000 /* Quarter channel (5 MHz wide) */ - -/* For IEEE80211_RADIOTAP_FLAGS */ -#define IEEE80211_RADIOTAP_F_CFP 0x01 /* sent/received - * during CFP - */ -#define IEEE80211_RADIOTAP_F_SHORTPRE 0x02 /* sent/received - * with short - * preamble - */ -#define IEEE80211_RADIOTAP_F_WEP 0x04 /* sent/received - * with WEP encryption - */ -#define IEEE80211_RADIOTAP_F_FRAG 0x08 /* sent/received - * with fragmentation - */ -#define IEEE80211_RADIOTAP_F_FCS 0x10 /* frame includes FCS */ -#define IEEE80211_RADIOTAP_F_DATAPAD 0x20 /* frame has padding between - * 802.11 header and payload - * (to 32-bit boundary) - */ -#define IEEE80211_RADIOTAP_F_BADFCS 0x40 /* bad FCS */ - -/* For IEEE80211_RADIOTAP_RX_FLAGS */ -#define IEEE80211_RADIOTAP_F_RX_BADPLCP 0x0002 /* frame has bad PLCP */ +/* for IEEE80211_RADIOTAP_FLAGS */ +enum ieee80211_radiotap_flags { + IEEE80211_RADIOTAP_F_CFP = 0x01, + IEEE80211_RADIOTAP_F_SHORTPRE = 0x02, + IEEE80211_RADIOTAP_F_WEP = 0x04, + IEEE80211_RADIOTAP_F_FRAG = 0x08, + IEEE80211_RADIOTAP_F_FCS = 0x10, + IEEE80211_RADIOTAP_F_DATAPAD = 0x20, + IEEE80211_RADIOTAP_F_BADFCS = 0x40, +}; -/* For IEEE80211_RADIOTAP_TX_FLAGS */ -#define IEEE80211_RADIOTAP_F_TX_FAIL 0x0001 /* failed due to excessive - * retries */ -#define IEEE80211_RADIOTAP_F_TX_CTS 0x0002 /* used cts 'protection' */ -#define IEEE80211_RADIOTAP_F_TX_RTS 0x0004 /* used rts/cts handshake */ -#define IEEE80211_RADIOTAP_F_TX_NOACK 0x0008 /* don't expect an ack */ +/* for IEEE80211_RADIOTAP_CHANNEL */ +enum ieee80211_radiotap_channel_flags { + IEEE80211_CHAN_CCK = 0x0020, + IEEE80211_CHAN_OFDM = 0x0040, + IEEE80211_CHAN_2GHZ = 0x0080, + IEEE80211_CHAN_5GHZ = 0x0100, + IEEE80211_CHAN_DYN = 0x0400, + IEEE80211_CHAN_HALF = 0x4000, + IEEE80211_CHAN_QUARTER = 0x8000, +}; +/* for IEEE80211_RADIOTAP_RX_FLAGS */ +enum ieee80211_radiotap_rx_flags { + IEEE80211_RADIOTAP_F_RX_BADPLCP = 0x0002, +}; -/* For IEEE80211_RADIOTAP_MCS */ -#define IEEE80211_RADIOTAP_MCS_HAVE_BW 0x01 -#define IEEE80211_RADIOTAP_MCS_HAVE_MCS 0x02 -#define IEEE80211_RADIOTAP_MCS_HAVE_GI 0x04 -#define IEEE80211_RADIOTAP_MCS_HAVE_FMT 0x08 -#define IEEE80211_RADIOTAP_MCS_HAVE_FEC 0x10 -#define IEEE80211_RADIOTAP_MCS_HAVE_STBC 0x20 +/* for IEEE80211_RADIOTAP_TX_FLAGS */ +enum ieee80211_radiotap_tx_flags { + IEEE80211_RADIOTAP_F_TX_FAIL = 0x0001, + IEEE80211_RADIOTAP_F_TX_CTS = 0x0002, + IEEE80211_RADIOTAP_F_TX_RTS = 0x0004, + IEEE80211_RADIOTAP_F_TX_NOACK = 0x0008, +}; -#define IEEE80211_RADIOTAP_MCS_BW_MASK 0x03 -#define IEEE80211_RADIOTAP_MCS_BW_20 0 -#define IEEE80211_RADIOTAP_MCS_BW_40 1 -#define IEEE80211_RADIOTAP_MCS_BW_20L 2 -#define IEEE80211_RADIOTAP_MCS_BW_20U 3 -#define IEEE80211_RADIOTAP_MCS_SGI 0x04 -#define IEEE80211_RADIOTAP_MCS_FMT_GF 0x08 -#define IEEE80211_RADIOTAP_MCS_FEC_LDPC 0x10 -#define IEEE80211_RADIOTAP_MCS_STBC_MASK 0x60 -#define IEEE80211_RADIOTAP_MCS_STBC_1 1 -#define IEEE80211_RADIOTAP_MCS_STBC_2 2 -#define IEEE80211_RADIOTAP_MCS_STBC_3 3 +/* for IEEE80211_RADIOTAP_MCS "have" flags */ +enum ieee80211_radiotap_mcs_have { + IEEE80211_RADIOTAP_MCS_HAVE_BW = 0x01, + IEEE80211_RADIOTAP_MCS_HAVE_MCS = 0x02, + IEEE80211_RADIOTAP_MCS_HAVE_GI = 0x04, + IEEE80211_RADIOTAP_MCS_HAVE_FMT = 0x08, + IEEE80211_RADIOTAP_MCS_HAVE_FEC = 0x10, + IEEE80211_RADIOTAP_MCS_HAVE_STBC = 0x20, +}; -#define IEEE80211_RADIOTAP_MCS_STBC_SHIFT 5 +enum ieee80211_radiotap_mcs_flags { + IEEE80211_RADIOTAP_MCS_BW_MASK = 0x03, + IEEE80211_RADIOTAP_MCS_BW_20 = 0, + IEEE80211_RADIOTAP_MCS_BW_40 = 1, + IEEE80211_RADIOTAP_MCS_BW_20L = 2, + IEEE80211_RADIOTAP_MCS_BW_20U = 3, + + IEEE80211_RADIOTAP_MCS_SGI = 0x04, + IEEE80211_RADIOTAP_MCS_FMT_GF = 0x08, + IEEE80211_RADIOTAP_MCS_FEC_LDPC = 0x10, + IEEE80211_RADIOTAP_MCS_STBC_MASK = 0x60, + IEEE80211_RADIOTAP_MCS_STBC_1 = 1, + IEEE80211_RADIOTAP_MCS_STBC_2 = 2, + IEEE80211_RADIOTAP_MCS_STBC_3 = 3, + IEEE80211_RADIOTAP_MCS_STBC_SHIFT = 5, +}; -/* For IEEE80211_RADIOTAP_AMPDU_STATUS */ -#define IEEE80211_RADIOTAP_AMPDU_REPORT_ZEROLEN 0x0001 -#define IEEE80211_RADIOTAP_AMPDU_IS_ZEROLEN 0x0002 -#define IEEE80211_RADIOTAP_AMPDU_LAST_KNOWN 0x0004 -#define IEEE80211_RADIOTAP_AMPDU_IS_LAST 0x0008 -#define IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_ERR 0x0010 -#define IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_KNOWN 0x0020 +/* for IEEE80211_RADIOTAP_AMPDU_STATUS */ +enum ieee80211_radiotap_ampdu_flags { + IEEE80211_RADIOTAP_AMPDU_REPORT_ZEROLEN = 0x0001, + IEEE80211_RADIOTAP_AMPDU_IS_ZEROLEN = 0x0002, + IEEE80211_RADIOTAP_AMPDU_LAST_KNOWN = 0x0004, + IEEE80211_RADIOTAP_AMPDU_IS_LAST = 0x0008, + IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_ERR = 0x0010, + IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_KNOWN = 0x0020, +}; -/* For IEEE80211_RADIOTAP_VHT */ -#define IEEE80211_RADIOTAP_VHT_KNOWN_STBC 0x0001 -#define IEEE80211_RADIOTAP_VHT_KNOWN_TXOP_PS_NA 0x0002 -#define IEEE80211_RADIOTAP_VHT_KNOWN_GI 0x0004 -#define IEEE80211_RADIOTAP_VHT_KNOWN_SGI_NSYM_DIS 0x0008 -#define IEEE80211_RADIOTAP_VHT_KNOWN_LDPC_EXTRA_OFDM_SYM 0x0010 -#define IEEE80211_RADIOTAP_VHT_KNOWN_BEAMFORMED 0x0020 -#define IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH 0x0040 -#define IEEE80211_RADIOTAP_VHT_KNOWN_GROUP_ID 0x0080 -#define IEEE80211_RADIOTAP_VHT_KNOWN_PARTIAL_AID 0x0100 +/* for IEEE80211_RADIOTAP_VHT */ +enum ieee80211_radiotap_vht_known { + IEEE80211_RADIOTAP_VHT_KNOWN_STBC = 0x0001, + IEEE80211_RADIOTAP_VHT_KNOWN_TXOP_PS_NA = 0x0002, + IEEE80211_RADIOTAP_VHT_KNOWN_GI = 0x0004, + IEEE80211_RADIOTAP_VHT_KNOWN_SGI_NSYM_DIS = 0x0008, + IEEE80211_RADIOTAP_VHT_KNOWN_LDPC_EXTRA_OFDM_SYM = 0x0010, + IEEE80211_RADIOTAP_VHT_KNOWN_BEAMFORMED = 0x0020, + IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH = 0x0040, + IEEE80211_RADIOTAP_VHT_KNOWN_GROUP_ID = 0x0080, + IEEE80211_RADIOTAP_VHT_KNOWN_PARTIAL_AID = 0x0100, +}; -#define IEEE80211_RADIOTAP_VHT_FLAG_STBC 0x01 -#define IEEE80211_RADIOTAP_VHT_FLAG_TXOP_PS_NA 0x02 -#define IEEE80211_RADIOTAP_VHT_FLAG_SGI 0x04 -#define IEEE80211_RADIOTAP_VHT_FLAG_SGI_NSYM_M10_9 0x08 -#define IEEE80211_RADIOTAP_VHT_FLAG_LDPC_EXTRA_OFDM_SYM 0x10 -#define IEEE80211_RADIOTAP_VHT_FLAG_BEAMFORMED 0x20 +enum ieee80211_radiotap_vht_flags { + IEEE80211_RADIOTAP_VHT_FLAG_STBC = 0x01, + IEEE80211_RADIOTAP_VHT_FLAG_TXOP_PS_NA = 0x02, + IEEE80211_RADIOTAP_VHT_FLAG_SGI = 0x04, + IEEE80211_RADIOTAP_VHT_FLAG_SGI_NSYM_M10_9 = 0x08, + IEEE80211_RADIOTAP_VHT_FLAG_LDPC_EXTRA_OFDM_SYM = 0x10, + IEEE80211_RADIOTAP_VHT_FLAG_BEAMFORMED = 0x20, +}; -#define IEEE80211_RADIOTAP_CODING_LDPC_USER0 0x01 -#define IEEE80211_RADIOTAP_CODING_LDPC_USER1 0x02 -#define IEEE80211_RADIOTAP_CODING_LDPC_USER2 0x04 -#define IEEE80211_RADIOTAP_CODING_LDPC_USER3 0x08 +enum ieee80211_radiotap_vht_coding { + IEEE80211_RADIOTAP_CODING_LDPC_USER0 = 0x01, + IEEE80211_RADIOTAP_CODING_LDPC_USER1 = 0x02, + IEEE80211_RADIOTAP_CODING_LDPC_USER2 = 0x04, + IEEE80211_RADIOTAP_CODING_LDPC_USER3 = 0x08, +}; -/* For IEEE80211_RADIOTAP_TIMESTAMP */ -#define IEEE80211_RADIOTAP_TIMESTAMP_UNIT_MASK 0x000F -#define IEEE80211_RADIOTAP_TIMESTAMP_UNIT_MS 0x0000 -#define IEEE80211_RADIOTAP_TIMESTAMP_UNIT_US 0x0001 -#define IEEE80211_RADIOTAP_TIMESTAMP_UNIT_NS 0x0003 -#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_MASK 0x00F0 -#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_BEGIN_MDPU 0x0000 -#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_PLCP_SIG_ACQ 0x0010 -#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_EO_PPDU 0x0020 -#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_EO_MPDU 0x0030 -#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_UNKNOWN 0x00F0 +/* for IEEE80211_RADIOTAP_TIMESTAMP */ +enum ieee80211_radiotap_timestamp_unit_spos { + IEEE80211_RADIOTAP_TIMESTAMP_UNIT_MASK = 0x000F, + IEEE80211_RADIOTAP_TIMESTAMP_UNIT_MS = 0x0000, + IEEE80211_RADIOTAP_TIMESTAMP_UNIT_US = 0x0001, + IEEE80211_RADIOTAP_TIMESTAMP_UNIT_NS = 0x0003, + IEEE80211_RADIOTAP_TIMESTAMP_SPOS_MASK = 0x00F0, + IEEE80211_RADIOTAP_TIMESTAMP_SPOS_BEGIN_MDPU = 0x0000, + IEEE80211_RADIOTAP_TIMESTAMP_SPOS_PLCP_SIG_ACQ = 0x0010, + IEEE80211_RADIOTAP_TIMESTAMP_SPOS_EO_PPDU = 0x0020, + IEEE80211_RADIOTAP_TIMESTAMP_SPOS_EO_MPDU = 0x0030, + IEEE80211_RADIOTAP_TIMESTAMP_SPOS_UNKNOWN = 0x00F0, +}; -#define IEEE80211_RADIOTAP_TIMESTAMP_FLAG_64BIT 0x00 -#define IEEE80211_RADIOTAP_TIMESTAMP_FLAG_32BIT 0x01 -#define IEEE80211_RADIOTAP_TIMESTAMP_FLAG_ACCURACY 0x02 +enum ieee80211_radiotap_timestamp_flags { + IEEE80211_RADIOTAP_TIMESTAMP_FLAG_64BIT = 0x00, + IEEE80211_RADIOTAP_TIMESTAMP_FLAG_32BIT = 0x01, + IEEE80211_RADIOTAP_TIMESTAMP_FLAG_ACCURACY = 0x02, +}; -/* helpers */ -static inline int ieee80211_get_radiotap_len(unsigned char *data) +/** + * ieee80211_get_radiotap_len - get radiotap header length + */ +static inline u16 ieee80211_get_radiotap_len(const char *data) { - struct ieee80211_radiotap_header *hdr = - (struct ieee80211_radiotap_header *)data; + struct ieee80211_radiotap_header *hdr = (void *)data; return get_unaligned_le16(&hdr->it_len); } -#endif /* IEEE80211_RADIOTAP_H */ +#endif /* __RADIOTAP_H */ diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 0fa4c324b713..d4088d1a688d 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -17,6 +17,7 @@ #include <net/snmp.h> #include <linux/ipv6.h> +#include <linux/refcount.h> /* inet6_dev.if_flags */ @@ -45,7 +46,7 @@ struct inet6_ifaddr { /* In seconds, relative to tstamp. Expiry is at tstamp + HZ * lft. */ __u32 valid_lft; __u32 prefered_lft; - atomic_t refcnt; + refcount_t refcnt; spinlock_t lock; int state; @@ -126,7 +127,7 @@ struct ifmcaddr6 { struct timer_list mca_timer; unsigned int mca_flags; int mca_users; - atomic_t mca_refcnt; + refcount_t mca_refcnt; spinlock_t mca_lock; unsigned long mca_cstamp; unsigned long mca_tstamp; @@ -146,7 +147,7 @@ struct ifacaddr6 { struct rt6_info *aca_rt; struct ifacaddr6 *aca_next; int aca_users; - atomic_t aca_refcnt; + refcount_t aca_refcnt; unsigned long aca_cstamp; unsigned long aca_tstamp; }; @@ -187,7 +188,7 @@ struct inet6_dev { struct ifacaddr6 *ac_list; rwlock_t lock; - atomic_t refcnt; + refcount_t refcnt; __u32 if_flags; int dead; @@ -205,7 +206,6 @@ struct inet6_dev { __s32 rs_interval; /* in jiffies */ __u8 rs_probes; - __u8 addr_gen_mode; unsigned long tstamp; /* ipv6InterfaceTable update timestamp */ struct rcu_head rcu; }; diff --git a/include/net/ife.h b/include/net/ife.h new file mode 100644 index 000000000000..2d87d6898b0a --- /dev/null +++ b/include/net/ife.h @@ -0,0 +1,51 @@ +#ifndef __NET_IFE_H +#define __NET_IFE_H + +#include <linux/etherdevice.h> +#include <linux/rtnetlink.h> +#include <linux/module.h> +#include <uapi/linux/ife.h> + +#if IS_ENABLED(CONFIG_NET_IFE) + +void *ife_encode(struct sk_buff *skb, u16 metalen); +void *ife_decode(struct sk_buff *skb, u16 *metalen); + +void *ife_tlv_meta_decode(void *skbdata, u16 *attrtype, u16 *dlen, u16 *totlen); +int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, + const void *dval); + +void *ife_tlv_meta_next(void *skbdata); + +#else + +static inline void *ife_encode(struct sk_buff *skb, u16 metalen) +{ + return NULL; +} + +static inline void *ife_decode(struct sk_buff *skb, u16 *metalen) +{ + return NULL; +} + +static inline void *ife_tlv_meta_decode(void *skbdata, u16 *attrtype, u16 *dlen, + u16 *totlen) +{ + return NULL; +} + +static inline int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, + const void *dval) +{ + return 0; +} + +static inline void *ife_tlv_meta_next(void *skbdata) +{ + return NULL; +} + +#endif + +#endif /* __NET_IFE_H */ diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h index 3212b39b5bfc..8ec87b62257b 100644 --- a/include/net/inet6_connection_sock.h +++ b/include/net/inet6_connection_sock.h @@ -15,16 +15,11 @@ #include <linux/types.h> -struct inet_bind_bucket; struct request_sock; struct sk_buff; struct sock; struct sockaddr; -int inet6_csk_bind_conflict(const struct sock *sk, - const struct inet_bind_bucket *tb, bool relax, - bool soreuseport_ok); - struct dst_entry *inet6_csk_route_req(const struct sock *sk, struct flowi6 *fl6, const struct request_sock *req, u8 proto); diff --git a/include/net/inet_common.h b/include/net/inet_common.h index 5d683428fced..f39ae697347f 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -17,10 +17,11 @@ int inet_release(struct socket *sock); int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags); int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, - int addr_len, int flags); + int addr_len, int flags, int is_sendmsg); int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags); -int inet_accept(struct socket *sock, struct socket *newsock, int flags); +int inet_accept(struct socket *sock, struct socket *newsock, int flags, + bool kern); int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size); ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 85ee3879499e..13e4c89a8231 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -62,9 +62,6 @@ struct inet_connection_sock_af_ops { char __user *optval, int __user *optlen); #endif void (*addr2sockaddr)(struct sock *sk, struct sockaddr *); - int (*bind_conflict)(const struct sock *sk, - const struct inet_bind_bucket *tb, - bool relax, bool soreuseport_ok); void (*mtu_reduced)(struct sock *sk); }; @@ -78,6 +75,8 @@ struct inet_connection_sock_af_ops { * @icsk_pmtu_cookie Last pmtu seen by socket * @icsk_ca_ops Pluggable congestion control hook * @icsk_af_ops Operations which are AF_INET{4,6} specific + * @icsk_ulp_ops Pluggable ULP control hook + * @icsk_ulp_data ULP private data * @icsk_ca_state: Congestion control state * @icsk_retransmits: Number of unrecovered [RTO] timeouts * @icsk_pending: Scheduled timer event @@ -100,6 +99,8 @@ struct inet_connection_sock { __u32 icsk_pmtu_cookie; const struct tcp_congestion_ops *icsk_ca_ops; const struct inet_connection_sock_af_ops *icsk_af_ops; + const struct tcp_ulp_ops *icsk_ulp_ops; + void *icsk_ulp_data; unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu); __u8 icsk_ca_state:6, icsk_ca_setsockopt:1, @@ -144,6 +145,7 @@ struct inet_connection_sock { #define ICSK_TIME_PROBE0 3 /* Zero window probe timer */ #define ICSK_TIME_EARLY_RETRANS 4 /* Early retransmit timer */ #define ICSK_TIME_LOSS_PROBE 5 /* Tail loss probe timer */ +#define ICSK_TIME_REO_TIMEOUT 6 /* Reordering timer */ static inline struct inet_connection_sock *inet_csk(const struct sock *sk) { @@ -234,7 +236,8 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, } if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0 || - what == ICSK_TIME_EARLY_RETRANS || what == ICSK_TIME_LOSS_PROBE) { + what == ICSK_TIME_EARLY_RETRANS || what == ICSK_TIME_LOSS_PROBE || + what == ICSK_TIME_REO_TIMEOUT) { icsk->icsk_pending = what; icsk->icsk_timeout = jiffies + when; sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); @@ -259,11 +262,8 @@ inet_csk_rto_backoff(const struct inet_connection_sock *icsk, return (unsigned long)min_t(u64, when, max_when); } -struct sock *inet_csk_accept(struct sock *sk, int flags, int *err); +struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern); -int inet_csk_bind_conflict(const struct sock *sk, - const struct inet_bind_bucket *tb, bool relax, - bool soreuseport_ok); int inet_csk_get_port(struct sock *sk, unsigned short snum); struct dst_entry *inet_csk_route_req(const struct sock *sk, struct flowi4 *fl4, diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 909972aa3acd..6fdcd2427776 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -50,7 +50,7 @@ struct inet_frag_queue { spinlock_t lock; struct timer_list timer; struct hlist_node list; - atomic_t refcnt; + refcount_t refcnt; struct sk_buff *fragments; struct sk_buff *fragments_tail; ktime_t stamp; @@ -92,7 +92,7 @@ struct inet_frags { */ u32 rnd; seqlock_t rnd_seqlock; - int qsize; + unsigned int qsize; unsigned int (*hashfn)(const struct inet_frag_queue *); bool (*match)(const struct inet_frag_queue *q, @@ -129,7 +129,7 @@ void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q, static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f) { - if (atomic_dec_and_test(&q->refcnt)) + if (refcount_dec_and_test(&q->refcnt)) inet_frag_destroy(q, f); } @@ -154,23 +154,17 @@ static inline int frag_mem_limit(struct netns_frags *nf) static inline void sub_frag_mem_limit(struct netns_frags *nf, int i) { - __percpu_counter_add(&nf->mem, -i, frag_percpu_counter_batch); + percpu_counter_add_batch(&nf->mem, -i, frag_percpu_counter_batch); } static inline void add_frag_mem_limit(struct netns_frags *nf, int i) { - __percpu_counter_add(&nf->mem, i, frag_percpu_counter_batch); + percpu_counter_add_batch(&nf->mem, i, frag_percpu_counter_batch); } static inline unsigned int sum_frag_mem_limit(struct netns_frags *nf) { - unsigned int res; - - local_bh_disable(); - res = percpu_counter_sum_positive(&nf->mem); - local_bh_enable(); - - return res; + return percpu_counter_sum_positive(&nf->mem); } /* RFC 3168 support : diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 0574493e3899..5026b1f08bb8 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -32,7 +32,7 @@ #include <net/tcp_states.h> #include <net/netns/hash.h> -#include <linux/atomic.h> +#include <linux/refcount.h> #include <asm/byteorder.h> /* This is for all connections with a full identity, no wildcards. @@ -74,13 +74,21 @@ struct inet_ehash_bucket { * users logged onto your box, isn't it nice to know that new data * ports are created in O(1) time? I thought so. ;-) -DaveM */ +#define FASTREUSEPORT_ANY 1 +#define FASTREUSEPORT_STRICT 2 + struct inet_bind_bucket { possible_net_t ib_net; unsigned short port; signed char fastreuse; signed char fastreuseport; kuid_t fastuid; - int num_owners; +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr fast_v6_rcv_saddr; +#endif + __be32 fast_rcv_saddr; + unsigned short fast_sk_family; + bool fast_ipv6_only; struct hlist_node node; struct hlist_head owners; }; @@ -203,10 +211,7 @@ void inet_hashinfo_init(struct inet_hashinfo *h); bool inet_ehash_insert(struct sock *sk, struct sock *osk); bool inet_ehash_nolisten(struct sock *sk, struct sock *osk); -int __inet_hash(struct sock *sk, struct sock *osk, - int (*saddr_same)(const struct sock *sk1, - const struct sock *sk2, - bool match_wildcard)); +int __inet_hash(struct sock *sk, struct sock *osk); int inet_hash(struct sock *sk); void inet_unhash(struct sock *sk); @@ -329,7 +334,7 @@ static inline struct sock *inet_lookup(struct net *net, sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr, dport, dif, &refcounted); - if (sk && !refcounted && !atomic_inc_not_zero(&sk->sk_refcnt)) + if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; return sk; } @@ -354,7 +359,6 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, refcounted); } -u32 sk_ehashfn(const struct sock *sk); u32 inet6_ehashfn(const struct net *net, const struct in6_addr *laddr, const u16 lport, const struct in6_addr *faddr, const __be16 fport); diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index c9cff977a7fb..aa95053dfc78 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -206,7 +206,11 @@ struct inet_sock { transparent:1, mc_all:1, nodefrag:1; - __u8 bind_address_no_port:1; + __u8 bind_address_no_port:1, + defer_connect:1; /* Indicates that fastopen_connect is set + * and cookie exists so we defer connect + * until first data frame is written + */ __u8 rcv_tos; __u8 convert_csum; int uc_index; diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index c9b3eb70f340..6a75d67a30fd 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -29,16 +29,6 @@ #include <linux/atomic.h> -struct inet_hashinfo; - -struct inet_timewait_death_row { - atomic_t tw_count; - - struct inet_hashinfo *hashinfo ____cacheline_aligned_in_smp; - int sysctl_tw_recycle; - int sysctl_max_tw_buckets; -}; - struct inet_bind_bucket; /* @@ -125,8 +115,7 @@ static inline void inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo void inet_twsk_deschedule_put(struct inet_timewait_sock *tw); -void inet_twsk_purge(struct inet_hashinfo *hashinfo, - struct inet_timewait_death_row *twdr, int family); +void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family); static inline struct net *twsk_net(const struct inet_timewait_sock *twsk) diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 235c7811a86a..f2a215fc78e4 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -46,7 +46,7 @@ struct inet_peer { struct rcu_head gc_rcu; }; /* - * Once inet_peer is queued for deletion (refcnt == -1), following field + * Once inet_peer is queued for deletion (refcnt == 0), following field * is not available: rid * We can share memory with rcu_head to help keep inet_peer small. */ @@ -60,7 +60,7 @@ struct inet_peer { /* following fields might be frequently dirtied */ __u32 dtime; /* the time of last use of not referenced entries */ - atomic_t refcnt; + refcount_t refcnt; }; struct inet_peer_base { diff --git a/include/net/ip.h b/include/net/ip.h index ab6761a7c883..821cedcc8e73 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -33,6 +33,8 @@ #include <net/flow.h> #include <net/flow_dissector.h> +#define IPV4_MAX_PMTU 65535U /* RFC 2675, Section 5.1 */ + struct sock; struct inet_skb_parm { @@ -263,11 +265,21 @@ static inline bool sysctl_dev_name_is_allowed(const char *name) return strcmp(name, "default") != 0 && strcmp(name, "all") != 0; } +static inline int inet_prot_sock(struct net *net) +{ + return net->ipv4.sysctl_ip_prot_sock; +} + #else static inline int inet_is_local_reserved_port(struct net *net, int port) { return 0; } + +static inline int inet_prot_sock(struct net *net) +{ + return PROT_SOCK; +} #endif __be32 inet_current_timestamp(void); diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index a74e2aa40ef4..1a88008cc6f5 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -37,7 +37,9 @@ struct fib6_config { int fc_ifindex; u32 fc_flags; u32 fc_protocol; - u32 fc_type; /* only 8 bits are used */ + u16 fc_type; /* only 8 bits are used */ + u16 fc_delete_all_nh : 1, + __unused : 15; struct in6_addr fc_dst; struct in6_addr fc_src; @@ -168,7 +170,7 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout) static inline u32 rt6_get_cookie(const struct rt6_info *rt) { if (rt->rt6i_flags & RTF_PCPU || - (unlikely(rt->dst.flags & DST_NOCACHE) && rt->dst.from)) + (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from)) rt = (struct rt6_info *)(rt->dst.from); return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; @@ -275,7 +277,8 @@ void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), void *arg); int fib6_add(struct fib6_node *root, struct rt6_info *rt, - struct nl_info *info, struct mx6_config *mxc); + struct nl_info *info, struct mx6_config *mxc, + struct netlink_ext_ack *extack); int fib6_del(struct rt6_info *rt, struct nl_info *info); void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info, diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 9dc2c182a263..199056933dcb 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -22,6 +22,7 @@ struct route_info { #include <net/flow.h> #include <net/ip6_fib.h> #include <net/sock.h> +#include <net/lwtunnel.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/route.h> @@ -84,12 +85,13 @@ struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int ifindex, struct flowi6 *fl6, int flags); +void ip6_route_init_special_entries(void); int ip6_route_init(void); void ip6_route_cleanup(void); int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg); -int ip6_route_add(struct fib6_config *cfg); +int ip6_route_add(struct fib6_config *cfg, struct netlink_ext_ack *extack); int ip6_ins_rt(struct rt6_info *); int ip6_del_rt(struct rt6_info *); @@ -115,7 +117,6 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr, int oif, int flags); struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6); -int icmp6_dst_gc(void); void fib6_force_start_gc(struct net *net); @@ -232,4 +233,11 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt, return daddr; } +static inline bool rt6_duplicate_nexthop(struct rt6_info *a, struct rt6_info *b) +{ + return a->dst.dev == b->dst.dev && + a->rt6i_idev == b->rt6i_idev && + ipv6_addr_equal(&a->rt6i_gateway, &b->rt6i_gateway) && + !lwtunnel_cmp_encap(a->dst.lwtstate, b->dst.lwtstate); +} #endif diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 1b1cf33cbfb0..08fbc7f7d8d7 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -33,6 +33,8 @@ struct __ip6_tnl_parm { __be16 o_flags; __be32 i_key; __be32 o_key; + + __u32 fwmark; }; /* IPv6 tunnel */ diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 5f376af377c7..41d580c6185f 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -23,6 +23,7 @@ #include <net/inetpeer.h> #include <linux/percpu.h> #include <linux/notifier.h> +#include <linux/refcount.h> struct fib_config { u8 fc_dst_len; @@ -105,7 +106,7 @@ struct fib_info { struct hlist_node fib_lhash; struct net *fib_net; int fib_treeref; - atomic_t fib_clntref; + refcount_t fib_clntref; unsigned int fib_flags; unsigned char fib_dead; unsigned char fib_protocol; @@ -114,11 +115,11 @@ struct fib_info { __be32 fib_prefsrc; u32 fib_tb_id; u32 fib_priority; - u32 *fib_metrics; -#define fib_mtu fib_metrics[RTAX_MTU-1] -#define fib_window fib_metrics[RTAX_WINDOW-1] -#define fib_rtt fib_metrics[RTAX_RTT-1] -#define fib_advmss fib_metrics[RTAX_ADVMSS-1] + struct dst_metrics *fib_metrics; +#define fib_mtu fib_metrics->metrics[RTAX_MTU-1] +#define fib_window fib_metrics->metrics[RTAX_WINDOW-1] +#define fib_rtt fib_metrics->metrics[RTAX_RTT-1] +#define fib_advmss fib_metrics->metrics[RTAX_ADVMSS-1] int fib_nhs; #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_weight; @@ -136,6 +137,7 @@ struct fib_rule; struct fib_table; struct fib_result { + __be32 prefix; unsigned char prefixlen; unsigned char nh_sel; unsigned char type; @@ -211,22 +213,47 @@ struct fib_entry_notifier_info { u8 tos; u8 type; u32 tb_id; - u32 nlflags; +}; + +struct fib_rule_notifier_info { + struct fib_notifier_info info; /* must be first */ + struct fib_rule *rule; +}; + +struct fib_nh_notifier_info { + struct fib_notifier_info info; /* must be first */ + struct fib_nh *fib_nh; }; enum fib_event_type { + FIB_EVENT_ENTRY_REPLACE, + FIB_EVENT_ENTRY_APPEND, FIB_EVENT_ENTRY_ADD, FIB_EVENT_ENTRY_DEL, FIB_EVENT_RULE_ADD, FIB_EVENT_RULE_DEL, + FIB_EVENT_NH_ADD, + FIB_EVENT_NH_DEL, }; int register_fib_notifier(struct notifier_block *nb, void (*cb)(struct notifier_block *nb)); int unregister_fib_notifier(struct notifier_block *nb); +int call_fib_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, + struct fib_notifier_info *info); int call_fib_notifiers(struct net *net, enum fib_event_type event_type, struct fib_notifier_info *info); +void fib_notify(struct net *net, struct notifier_block *nb); +#ifdef CONFIG_IP_MULTIPLE_TABLES +void fib_rules_notify(struct net *net, struct notifier_block *nb); +#else +static inline void fib_rules_notify(struct net *net, struct notifier_block *nb) +{ +} +#endif + struct fib_table { struct hlist_node tb_hlist; u32 tb_id; @@ -238,8 +265,10 @@ struct fib_table { int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, struct fib_result *res, int fib_flags); -int fib_table_insert(struct net *, struct fib_table *, struct fib_config *); -int fib_table_delete(struct net *, struct fib_table *, struct fib_config *); +int fib_table_insert(struct net *, struct fib_table *, struct fib_config *, + struct netlink_ext_ack *extack); +int fib_table_delete(struct net *, struct fib_table *, struct fib_config *, + struct netlink_ext_ack *extack); int fib_table_dump(struct fib_table *table, struct sk_buff *skb, struct netlink_callback *cb); int fib_table_flush(struct net *net, struct fib_table *table); @@ -291,6 +320,11 @@ static inline int fib_lookup(struct net *net, const struct flowi4 *flp, return err; } +static inline bool fib4_rule_default(const struct fib_rule *rule) +{ + return true; +} + #else /* CONFIG_IP_MULTIPLE_TABLES */ int __net_init fib4_rules_init(struct net *net); void __net_exit fib4_rules_exit(struct net *net); @@ -335,6 +369,8 @@ out: return err; } +bool fib4_rule_default(const struct fib_rule *rule); + #endif /* CONFIG_IP_MULTIPLE_TABLES */ /* Exported by fib_frontend.c */ @@ -344,7 +380,6 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb); int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, int oif, struct net_device *dev, struct in_device *idev, u32 *itag); -void fib_select_default(const struct flowi4 *flp, struct fib_result *res); #ifdef CONFIG_IP_ROUTE_CLASSID static inline int fib_num_tclassid_users(struct net *net) { @@ -364,17 +399,13 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force); int fib_sync_down_addr(struct net_device *dev, __be32 local); int fib_sync_up(struct net_device *dev, unsigned int nh_flags); -extern u32 fib_multipath_secret __read_mostly; - -static inline int fib_multipath_hash(__be32 saddr, __be32 daddr) -{ - return jhash_2words((__force u32)saddr, (__force u32)daddr, - fib_multipath_secret) >> 1; -} - +#ifdef CONFIG_IP_ROUTE_MULTIPATH +int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4, + const struct sk_buff *skb); +#endif void fib_select_multipath(struct fib_result *res, int hash); void fib_select_path(struct net *net, struct fib_result *res, - struct flowi4 *fl4, int mp_hash); + struct flowi4 *fl4, const struct sk_buff *skb); /* Exported by fib_trie.c */ void fib_trie_init(void); @@ -400,12 +431,12 @@ void free_fib_info(struct fib_info *fi); static inline void fib_info_hold(struct fib_info *fi) { - atomic_inc(&fi->fib_clntref); + refcount_inc(&fi->fib_clntref); } static inline void fib_info_put(struct fib_info *fi) { - if (atomic_dec_and_test(&fi->fib_clntref)) + if (refcount_dec_and_test(&fi->fib_clntref)) free_fib_info(fi); } diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index e893fe43dd13..520809912f03 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -58,6 +58,7 @@ struct ip_tunnel_key { /* Flags for ip_tunnel_info mode. */ #define IP_TUNNEL_INFO_TX 0x01 /* represents tx tunnel parameters */ #define IP_TUNNEL_INFO_IPV6 0x02 /* key contains IPv6 addresses */ +#define IP_TUNNEL_INFO_BRIDGE 0x04 /* represents a bridged tunnel id */ /* Maximum tunnel options length. */ #define IP_TUNNEL_OPTS_MAX \ @@ -131,6 +132,7 @@ struct ip_tunnel { unsigned int prl_count; /* # of entries in PRL */ unsigned int ip_tnl_net_id; struct gro_cells gro_cells; + __u32 fwmark; bool collect_md; bool ignore_df; }; @@ -261,8 +263,8 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict); int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu); -struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *tot); +void ip_tunnel_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *tot); struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, int link, __be16 flags, __be32 remote, __be32 local, @@ -272,9 +274,9 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, bool log_ecn_error); int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], - struct ip_tunnel_parm *p); + struct ip_tunnel_parm *p, __u32 fwmark); int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], - struct ip_tunnel_parm *p); + struct ip_tunnel_parm *p, __u32 fwmark); void ip_tunnel_setup(struct net_device *dev, unsigned int net_id); struct ip_tunnel_encap_ops { diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index cd6018a9ee24..4f4f786255ef 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -12,6 +12,8 @@ #include <linux/list.h> /* for struct list_head */ #include <linux/spinlock.h> /* for struct rwlock_t */ #include <linux/atomic.h> /* for struct atomic_t */ +#include <linux/refcount.h> /* for struct refcount_t */ + #include <linux/compiler.h> #include <linux/timer.h> #include <linux/bug.h> @@ -525,7 +527,7 @@ struct ip_vs_conn { struct netns_ipvs *ipvs; /* counter and timer */ - atomic_t refcnt; /* reference count */ + refcount_t refcnt; /* reference count */ struct timer_list timer; /* Expiration timer */ volatile unsigned long timeout; /* timeout */ @@ -667,7 +669,7 @@ struct ip_vs_dest { atomic_t conn_flags; /* flags to copy to conn */ atomic_t weight; /* server weight */ - atomic_t refcnt; /* reference counter */ + refcount_t refcnt; /* reference counter */ struct ip_vs_stats stats; /* statistics */ unsigned long idle_start; /* start time, jiffies */ @@ -1211,14 +1213,14 @@ struct ip_vs_conn * ip_vs_conn_out_get_proto(struct netns_ipvs *ipvs, int af, */ static inline bool __ip_vs_conn_get(struct ip_vs_conn *cp) { - return atomic_inc_not_zero(&cp->refcnt); + return refcount_inc_not_zero(&cp->refcnt); } /* put back the conn without restarting its timer */ static inline void __ip_vs_conn_put(struct ip_vs_conn *cp) { smp_mb__before_atomic(); - atomic_dec(&cp->refcnt); + refcount_dec(&cp->refcnt); } void ip_vs_conn_put(struct ip_vs_conn *cp); void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport); @@ -1347,8 +1349,6 @@ int ip_vs_protocol_init(void); void ip_vs_protocol_cleanup(void); void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags); int *ip_vs_create_timeout_table(int *table, int size); -int ip_vs_set_state_timeout(int *table, int num, const char *const *names, - const char *name, int to); void ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, const struct sk_buff *skb, int offset, const char *msg); @@ -1410,18 +1410,18 @@ void ip_vs_try_bind_dest(struct ip_vs_conn *cp); static inline void ip_vs_dest_hold(struct ip_vs_dest *dest) { - atomic_inc(&dest->refcnt); + refcount_inc(&dest->refcnt); } static inline void ip_vs_dest_put(struct ip_vs_dest *dest) { smp_mb__before_atomic(); - atomic_dec(&dest->refcnt); + refcount_dec(&dest->refcnt); } static inline void ip_vs_dest_put_and_free(struct ip_vs_dest *dest) { - if (atomic_dec_return(&dest->refcnt) < 0) + if (refcount_dec_and_test(&dest->refcnt)) kfree(dest); } @@ -1553,11 +1553,9 @@ static inline void ip_vs_notrack(struct sk_buff *skb) enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - if (!ct || !nf_ct_is_untracked(ct)) { - nf_conntrack_put(skb->nfct); - skb->nfct = &nf_ct_untracked_get()->ct_general; - skb->nfctinfo = IP_CT_NEW; - nf_conntrack_get(skb->nfct); + if (ct) { + nf_conntrack_put(&ct->ct_general); + nf_ct_set(skb, NULL, IP_CT_UNTRACKED); } #endif } @@ -1616,7 +1614,7 @@ static inline bool ip_vs_conn_uses_conntrack(struct ip_vs_conn *cp, if (!(cp->flags & IP_VS_CONN_F_NFCT)) return false; ct = nf_ct_get(skb, &ctinfo); - if (ct && !nf_ct_is_untracked(ct)) + if (ct) return true; #endif return false; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index dbf0abba33b8..6eac5cf8f1e6 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -16,6 +16,7 @@ #include <linux/ipv6.h> #include <linux/hardirq.h> #include <linux/jhash.h> +#include <linux/refcount.h> #include <net/if_inet6.h> #include <net/ndisc.h> #include <net/flow.h> @@ -203,7 +204,7 @@ extern rwlock_t ip6_ra_lock; */ struct ipv6_txoptions { - atomic_t refcnt; + refcount_t refcnt; /* Length of this structure */ int tot_len; @@ -265,7 +266,7 @@ static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np) rcu_read_lock(); opt = rcu_dereference(np->opt); if (opt) { - if (!atomic_inc_not_zero(&opt->refcnt)) + if (!refcount_inc_not_zero(&opt->refcnt)) opt = NULL; else opt = rcu_pointer_handoff(opt); @@ -276,7 +277,7 @@ static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np) static inline void txopt_put(struct ipv6_txoptions *opt) { - if (opt && atomic_dec_and_test(&opt->refcnt)) + if (opt && refcount_dec_and_test(&opt->refcnt)) kfree_rcu(opt, rcu); } @@ -1007,6 +1008,7 @@ int inet6_hash_connect(struct inet_timewait_death_row *death_row, */ extern const struct proto_ops inet6_stream_ops; extern const struct proto_ops inet6_dgram_ops; +extern const struct proto_ops inet6_sockraw_ops; struct group_source_req; struct group_filter; diff --git a/include/net/ipx.h b/include/net/ipx.h index e5cff6811b30..af32b97b5ddd 100644 --- a/include/net/ipx.h +++ b/include/net/ipx.h @@ -14,6 +14,7 @@ #include <linux/ipx.h> #include <linux/list.h> #include <linux/slab.h> +#include <linux/refcount.h> struct ipx_address { __be32 net; @@ -54,7 +55,7 @@ struct ipx_interface { /* IPX address */ __be32 if_netnum; unsigned char if_node[IPX_NODE_LEN]; - atomic_t refcnt; + refcount_t refcnt; /* physical device info */ struct net_device *if_dev; @@ -80,7 +81,7 @@ struct ipx_route { unsigned char ir_routed; unsigned char ir_router_node[IPX_NODE_LEN]; struct list_head node; /* node in ipx_routes list */ - atomic_t refcnt; + refcount_t refcnt; }; struct ipx_cb { @@ -139,7 +140,7 @@ const char *ipx_device_name(struct ipx_interface *intrfc); static __inline__ void ipxitf_hold(struct ipx_interface *intrfc) { - atomic_inc(&intrfc->refcnt); + refcount_inc(&intrfc->refcnt); } void ipxitf_down(struct ipx_interface *intrfc); @@ -157,18 +158,18 @@ int ipxrtr_ioctl(unsigned int cmd, void __user *arg); static __inline__ void ipxitf_put(struct ipx_interface *intrfc) { - if (atomic_dec_and_test(&intrfc->refcnt)) + if (refcount_dec_and_test(&intrfc->refcnt)) ipxitf_down(intrfc); } static __inline__ void ipxrtr_hold(struct ipx_route *rt) { - atomic_inc(&rt->refcnt); + refcount_inc(&rt->refcnt); } static __inline__ void ipxrtr_put(struct ipx_route *rt) { - if (atomic_dec_and_test(&rt->refcnt)) + if (refcount_dec_and_test(&rt->refcnt)) kfree(rt); } #endif /* _NET_INET_IPX_H_ */ diff --git a/include/net/irda/timer.h b/include/net/irda/timer.h index cb2615ccf761..d784f242cf7b 100644 --- a/include/net/irda/timer.h +++ b/include/net/irda/timer.h @@ -59,7 +59,7 @@ struct lap_cb; * Slot timer must never exceed 85 ms, and must always be at least 25 ms, * suggested to 75-85 msec by IrDA lite. This doesn't work with a lot of * devices, and other stackes uses a lot more, so it's best we do it as well - * (Note : this is the default value and sysctl overides it - Jean II) + * (Note : this is the default value and sysctl overrides it - Jean II) */ #define SLOT_TIMEOUT (90*HZ/1000) diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h index e0f4109e64c6..2509728650bd 100644 --- a/include/net/iw_handler.h +++ b/include/net/iw_handler.h @@ -505,25 +505,8 @@ static inline int iwe_stream_event_len_adjust(struct iw_request_info *info, /* * Wrapper to add an Wireless Event to a stream of events. */ -static inline char * -iwe_stream_add_event(struct iw_request_info *info, char *stream, char *ends, - struct iw_event *iwe, int event_len) -{ - int lcp_len = iwe_stream_lcp_len(info); - - event_len = iwe_stream_event_len_adjust(info, event_len); - - /* Check if it's possible */ - if(likely((stream + event_len) < ends)) { - iwe->len = event_len; - /* Beware of alignement issues on 64 bits */ - memcpy(stream, (char *) iwe, IW_EV_LCP_PK_LEN); - memcpy(stream + lcp_len, &iwe->u, - event_len - lcp_len); - stream += event_len; - } - return stream; -} +char *iwe_stream_add_event(struct iw_request_info *info, char *stream, + char *ends, struct iw_event *iwe, int event_len); static inline char * iwe_stream_add_event_check(struct iw_request_info *info, char *stream, @@ -541,26 +524,8 @@ iwe_stream_add_event_check(struct iw_request_info *info, char *stream, * Wrapper to add an short Wireless Event containing a pointer to a * stream of events. */ -static inline char * -iwe_stream_add_point(struct iw_request_info *info, char *stream, char *ends, - struct iw_event *iwe, char *extra) -{ - int event_len = iwe_stream_point_len(info) + iwe->u.data.length; - int point_len = iwe_stream_point_len(info); - int lcp_len = iwe_stream_lcp_len(info); - - /* Check if it's possible */ - if(likely((stream + event_len) < ends)) { - iwe->len = event_len; - memcpy(stream, (char *) iwe, IW_EV_LCP_PK_LEN); - memcpy(stream + lcp_len, - ((char *) &iwe->u) + IW_EV_POINT_OFF, - IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN); - memcpy(stream + point_len, extra, iwe->u.data.length); - stream += event_len; - } - return stream; -} +char *iwe_stream_add_point(struct iw_request_info *info, char *stream, + char *ends, struct iw_event *iwe, char *extra); static inline char * iwe_stream_add_point_check(struct iw_request_info *info, char *stream, @@ -579,25 +544,8 @@ iwe_stream_add_point_check(struct iw_request_info *info, char *stream, * Be careful, this one is tricky to use properly : * At the first run, you need to have (value = event + IW_EV_LCP_LEN). */ -static inline char * -iwe_stream_add_value(struct iw_request_info *info, char *event, char *value, - char *ends, struct iw_event *iwe, int event_len) -{ - int lcp_len = iwe_stream_lcp_len(info); - - /* Don't duplicate LCP */ - event_len -= IW_EV_LCP_LEN; - - /* Check if it's possible */ - if(likely((value + event_len) < ends)) { - /* Add new value */ - memcpy(value, &iwe->u, event_len); - value += event_len; - /* Patch LCP */ - iwe->len = value - event; - memcpy(event, (char *) iwe, lcp_len); - } - return value; -} +char *iwe_stream_add_value(struct iw_request_info *info, char *event, + char *value, char *ends, struct iw_event *iwe, + int event_len); #endif /* _IW_HANDLER_H */ diff --git a/include/net/lapb.h b/include/net/lapb.h index 9510f8725f03..85e773742f4e 100644 --- a/include/net/lapb.h +++ b/include/net/lapb.h @@ -1,6 +1,7 @@ #ifndef _LAPB_H #define _LAPB_H #include <linux/lapb.h> +#include <linux/refcount.h> #define LAPB_HEADER_LEN 20 /* LAPB over Ethernet + a bit more */ @@ -101,7 +102,7 @@ struct lapb_cb { struct lapb_frame frmr_data; unsigned char frmr_type; - atomic_t refcnt; + refcount_t refcnt; }; /* lapb_iface.c */ diff --git a/include/net/llc.h b/include/net/llc.h index e8e61d4fb458..dc35f25eb679 100644 --- a/include/net/llc.h +++ b/include/net/llc.h @@ -55,7 +55,7 @@ struct llc_sap { unsigned char state; unsigned char p_bit; unsigned char f_bit; - atomic_t refcnt; + refcount_t refcnt; int (*rcv_func)(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, @@ -113,14 +113,14 @@ struct llc_sap *llc_sap_open(unsigned char lsap, struct net_device *orig_dev)); static inline void llc_sap_hold(struct llc_sap *sap) { - atomic_inc(&sap->refcnt); + refcount_inc(&sap->refcnt); } void llc_sap_close(struct llc_sap *sap); static inline void llc_sap_put(struct llc_sap *sap) { - if (atomic_dec_and_test(&sap->refcnt)) + if (refcount_dec_and_test(&sap->refcnt)) llc_sap_close(sap); } diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index 73dd87647460..7c26863b8cf4 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -33,9 +33,10 @@ struct lwtunnel_state { }; struct lwtunnel_encap_ops { - int (*build_state)(struct net_device *dev, struct nlattr *encap, + int (*build_state)(struct nlattr *encap, unsigned int family, const void *cfg, - struct lwtunnel_state **ts); + struct lwtunnel_state **ts, + struct netlink_ext_ack *extack); void (*destroy_state)(struct lwtunnel_state *lws); int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb); int (*input)(struct sk_buff *skb); @@ -107,12 +108,15 @@ int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op, unsigned int num); int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, unsigned int num); -int lwtunnel_valid_encap_type(u16 encap_type); -int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len); -int lwtunnel_build_state(struct net_device *dev, u16 encap_type, +int lwtunnel_valid_encap_type(u16 encap_type, + struct netlink_ext_ack *extack); +int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len, + struct netlink_ext_ack *extack); +int lwtunnel_build_state(u16 encap_type, struct nlattr *encap, unsigned int family, const void *cfg, - struct lwtunnel_state **lws); + struct lwtunnel_state **lws, + struct netlink_ext_ack *extack); int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate); int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate); @@ -172,19 +176,26 @@ static inline int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, return -EOPNOTSUPP; } -static inline int lwtunnel_valid_encap_type(u16 encap_type) +static inline int lwtunnel_valid_encap_type(u16 encap_type, + struct netlink_ext_ack *extack) { + NL_SET_ERR_MSG(extack, "CONFIG_LWTUNNEL is not enabled in this kernel"); return -EOPNOTSUPP; } -static inline int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len) +static inline int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len, + struct netlink_ext_ack *extack) { - return -EOPNOTSUPP; + /* return 0 since we are not walking attr looking for + * RTA_ENCAP_TYPE attribute on nexthops. + */ + return 0; } -static inline int lwtunnel_build_state(struct net_device *dev, u16 encap_type, +static inline int lwtunnel_build_state(u16 encap_type, struct nlattr *encap, unsigned int family, const void *cfg, - struct lwtunnel_state **lws) + struct lwtunnel_state **lws, + struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 5345d358a510..b2b5419467cc 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -5,7 +5,7 @@ * Copyright 2006-2007 Jiri Benc <[email protected]> * Copyright 2007-2010 Johannes Berg <[email protected]> * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright (C) 2015 - 2016 Intel Deutschland GmbH + * Copyright (C) 2015 - 2017 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -147,7 +147,6 @@ enum ieee80211_ac_numbers { IEEE80211_AC_BE = 2, IEEE80211_AC_BK = 3, }; -#define IEEE80211_NUM_ACS 4 /** * struct ieee80211_tx_queue_params - transmit queue configuration @@ -300,6 +299,8 @@ struct ieee80211_vif_chanctx_switch { * context had been assigned. * @BSS_CHANGED_OCB: OCB join status changed * @BSS_CHANGED_MU_GROUPS: VHT MU-MIMO group id or user position changed + * @BSS_CHANGED_KEEP_ALIVE: keep alive options (idle period or protected + * keep alive) changed. */ enum ieee80211_bss_change { BSS_CHANGED_ASSOC = 1<<0, @@ -326,6 +327,7 @@ enum ieee80211_bss_change { BSS_CHANGED_BANDWIDTH = 1<<21, BSS_CHANGED_OCB = 1<<22, BSS_CHANGED_MU_GROUPS = 1<<23, + BSS_CHANGED_KEEP_ALIVE = 1<<24, /* when adding here, make sure to change ieee80211_reconfig */ }; @@ -502,6 +504,10 @@ struct ieee80211_mu_group_data { * implies disabled. As with the cfg80211 callback, a change here should * cause an event to be sent indicating where the current value is in * relation to the newly configured threshold. + * @cqm_rssi_low: Connection quality monitor RSSI lower threshold, a zero value + * implies disabled. This is an alternative mechanism to the single + * threshold event and can't be enabled simultaneously with it. + * @cqm_rssi_high: Connection quality monitor RSSI upper threshold. * @cqm_rssi_hyst: Connection quality monitor RSSI hysteresis * @arp_addr_list: List of IPv4 addresses for hardware ARP filtering. The * may filter ARP queries targeted for other addresses than listed here. @@ -530,6 +536,13 @@ struct ieee80211_mu_group_data { * @allow_p2p_go_ps: indication for AP or P2P GO interface, whether it's allowed * to use P2P PS mechanism or not. AP/P2P GO is not allowed to use P2P PS * if it has associated clients without P2P PS support. + * @max_idle_period: the time period during which the station can refrain from + * transmitting frames to its associated AP without being disassociated. + * In units of 1000 TUs. Zero value indicates that the AP did not include + * a (valid) BSS Max Idle Period Element. + * @protected_keep_alive: if set, indicates that the station should send an RSN + * protected frame to the AP to reset the idle timer at the AP for the + * station. */ struct ieee80211_bss_conf { const u8 *bssid; @@ -554,6 +567,8 @@ struct ieee80211_bss_conf { u16 ht_operation_mode; s32 cqm_rssi_thold; u32 cqm_rssi_hyst; + s32 cqm_rssi_low; + s32 cqm_rssi_high; struct cfg80211_chan_def chandef; struct ieee80211_mu_group_data mu_group; __be32 arp_addr_list[IEEE80211_BSS_ARP_ADDR_LIST_LEN]; @@ -568,6 +583,8 @@ struct ieee80211_bss_conf { enum nl80211_tx_power_setting txpower_type; struct ieee80211_p2p_noa_attr p2p_noa_attr; bool allow_p2p_go_ps; + u16 max_idle_period; + bool protected_keep_alive; }; /** @@ -944,6 +961,19 @@ struct ieee80211_tx_info { }; /** + * struct ieee80211_tx_status - extended tx staus info for rate control + * + * @sta: Station that the packet was transmitted for + * @info: Basic tx status information + * @skb: Packet skb (can be NULL if not provided by the driver) + */ +struct ieee80211_tx_status { + struct ieee80211_sta *sta; + struct ieee80211_tx_info *info; + struct sk_buff *skb; +}; + +/** * struct ieee80211_scan_ies - descriptors for different blocks of IEs * * This structure is used to point to different blocks of IEs in HW scan @@ -1018,7 +1048,7 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * @RX_FLAG_DECRYPTED: This frame was decrypted in hardware. * @RX_FLAG_MMIC_STRIPPED: the Michael MIC is stripped off this frame, * verification has been done by the hardware. - * @RX_FLAG_IV_STRIPPED: The IV/ICV are stripped from this frame. + * @RX_FLAG_IV_STRIPPED: The IV and ICV are stripped from this frame. * If this flag is set, the stack cannot do any replay detection * hence the driver or hardware will have to do that. * @RX_FLAG_PN_VALIDATED: Currently only valid for CCMP/GCMP frames, this @@ -1040,16 +1070,8 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * (including FCS) was received. * @RX_FLAG_MACTIME_PLCP_START: The timestamp passed in the RX status (@mactime * field) is valid and contains the time the SYNC preamble was received. - * @RX_FLAG_SHORTPRE: Short preamble was used for this frame - * @RX_FLAG_HT: HT MCS was used and rate_idx is MCS index - * @RX_FLAG_VHT: VHT MCS was used and rate_index is MCS index - * @RX_FLAG_40MHZ: HT40 (40 MHz) was used - * @RX_FLAG_SHORT_GI: Short guard interval was used * @RX_FLAG_NO_SIGNAL_VAL: The signal strength value is not present. * Valid only for data frames (mainly A-MPDU) - * @RX_FLAG_HT_GF: This frame was received in a HT-greenfield transmission, if - * the driver fills this value it should add %IEEE80211_RADIOTAP_MCS_HAVE_FMT - * to hw.radiotap_mcs_details to advertise that fact * @RX_FLAG_AMPDU_DETAILS: A-MPDU details are known, in particular the reference * number (@ampdu_reference) must be populated and be a distinct number for * each A-MPDU @@ -1062,7 +1084,6 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * is stored in the @ampdu_delimiter_crc field) * @RX_FLAG_MIC_STRIPPED: The mic was stripped of this packet. Decryption was * done by the hardware - * @RX_FLAG_LDPC: LDPC was used * @RX_FLAG_ONLY_MONITOR: Report frame only to monitor interfaces without * processing it in any regular way. * This is useful if drivers offload some frames but still want to report @@ -1071,9 +1092,6 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * monitor interfaces. * This is useful if drivers offload some frames but still want to report * them for sniffing purposes. - * @RX_FLAG_STBC_MASK: STBC 2 bit bitmask. 1 - Nss=1, 2 - Nss=2, 3 - Nss=3 - * @RX_FLAG_10MHZ: 10 MHz (half channel) was used - * @RX_FLAG_5MHZ: 5 MHz (quarter channel) was used * @RX_FLAG_AMSDU_MORE: Some drivers may prefer to report separate A-MSDU * subframes instead of a one huge frame for performance reasons. * All, but the last MSDU from an A-MSDU should have this flag set. E.g. @@ -1089,6 +1107,8 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * @RX_FLAG_ALLOW_SAME_PN: Allow the same PN as same packet before. * This is used for AMSDU subframes which can have the same PN as * the first subframe. + * @RX_FLAG_ICV_STRIPPED: The ICV is stripped from this frame. CRC checking must + * be done in the hardware. */ enum mac80211_rx_flags { RX_FLAG_MMIC_ERROR = BIT(0), @@ -1099,49 +1119,52 @@ enum mac80211_rx_flags { RX_FLAG_FAILED_FCS_CRC = BIT(5), RX_FLAG_FAILED_PLCP_CRC = BIT(6), RX_FLAG_MACTIME_START = BIT(7), - RX_FLAG_SHORTPRE = BIT(8), - RX_FLAG_HT = BIT(9), - RX_FLAG_40MHZ = BIT(10), - RX_FLAG_SHORT_GI = BIT(11), - RX_FLAG_NO_SIGNAL_VAL = BIT(12), - RX_FLAG_HT_GF = BIT(13), - RX_FLAG_AMPDU_DETAILS = BIT(14), - RX_FLAG_PN_VALIDATED = BIT(15), - RX_FLAG_DUP_VALIDATED = BIT(16), - RX_FLAG_AMPDU_LAST_KNOWN = BIT(17), - RX_FLAG_AMPDU_IS_LAST = BIT(18), - RX_FLAG_AMPDU_DELIM_CRC_ERROR = BIT(19), - RX_FLAG_AMPDU_DELIM_CRC_KNOWN = BIT(20), - RX_FLAG_MACTIME_END = BIT(21), - RX_FLAG_VHT = BIT(22), - RX_FLAG_LDPC = BIT(23), - RX_FLAG_ONLY_MONITOR = BIT(24), - RX_FLAG_SKIP_MONITOR = BIT(25), - RX_FLAG_STBC_MASK = BIT(26) | BIT(27), - RX_FLAG_10MHZ = BIT(28), - RX_FLAG_5MHZ = BIT(29), - RX_FLAG_AMSDU_MORE = BIT(30), - RX_FLAG_RADIOTAP_VENDOR_DATA = BIT(31), - RX_FLAG_MIC_STRIPPED = BIT_ULL(32), - RX_FLAG_ALLOW_SAME_PN = BIT_ULL(33), + RX_FLAG_NO_SIGNAL_VAL = BIT(8), + RX_FLAG_AMPDU_DETAILS = BIT(9), + RX_FLAG_PN_VALIDATED = BIT(10), + RX_FLAG_DUP_VALIDATED = BIT(11), + RX_FLAG_AMPDU_LAST_KNOWN = BIT(12), + RX_FLAG_AMPDU_IS_LAST = BIT(13), + RX_FLAG_AMPDU_DELIM_CRC_ERROR = BIT(14), + RX_FLAG_AMPDU_DELIM_CRC_KNOWN = BIT(15), + RX_FLAG_MACTIME_END = BIT(16), + RX_FLAG_ONLY_MONITOR = BIT(17), + RX_FLAG_SKIP_MONITOR = BIT(18), + RX_FLAG_AMSDU_MORE = BIT(19), + RX_FLAG_RADIOTAP_VENDOR_DATA = BIT(20), + RX_FLAG_MIC_STRIPPED = BIT(21), + RX_FLAG_ALLOW_SAME_PN = BIT(22), + RX_FLAG_ICV_STRIPPED = BIT(23), }; -#define RX_FLAG_STBC_SHIFT 26 - /** - * enum mac80211_rx_vht_flags - receive VHT flags + * enum mac80211_rx_encoding_flags - MCS & bandwidth flags * - * These flags are used with the @vht_flag member of - * &struct ieee80211_rx_status. - * @RX_VHT_FLAG_80MHZ: 80 MHz was used - * @RX_VHT_FLAG_160MHZ: 160 MHz was used - * @RX_VHT_FLAG_BF: packet was beamformed - */ + * @RX_ENC_FLAG_SHORTPRE: Short preamble was used for this frame + * @RX_ENC_FLAG_SHORT_GI: Short guard interval was used + * @RX_ENC_FLAG_HT_GF: This frame was received in a HT-greenfield transmission, + * if the driver fills this value it should add + * %IEEE80211_RADIOTAP_MCS_HAVE_FMT + * to hw.radiotap_mcs_details to advertise that fact + * @RX_ENC_FLAG_LDPC: LDPC was used + * @RX_ENC_FLAG_STBC_MASK: STBC 2 bit bitmask. 1 - Nss=1, 2 - Nss=2, 3 - Nss=3 + * @RX_ENC_FLAG_BF: packet was beamformed + */ +enum mac80211_rx_encoding_flags { + RX_ENC_FLAG_SHORTPRE = BIT(0), + RX_ENC_FLAG_SHORT_GI = BIT(2), + RX_ENC_FLAG_HT_GF = BIT(3), + RX_ENC_FLAG_STBC_MASK = BIT(4) | BIT(5), + RX_ENC_FLAG_LDPC = BIT(6), + RX_ENC_FLAG_BF = BIT(7), +}; -enum mac80211_rx_vht_flags { - RX_VHT_FLAG_80MHZ = BIT(0), - RX_VHT_FLAG_160MHZ = BIT(1), - RX_VHT_FLAG_BF = BIT(2), +#define RX_ENC_FLAG_STBC_SHIFT 4 + +enum mac80211_rx_encoding { + RX_ENC_LEGACY = 0, + RX_ENC_HT, + RX_ENC_VHT, }; /** @@ -1171,9 +1194,11 @@ enum mac80211_rx_vht_flags { * @antenna: antenna used * @rate_idx: index of data rate into band's supported rates or MCS index if * HT or VHT is used (%RX_FLAG_HT/%RX_FLAG_VHT) - * @vht_nss: number of streams (VHT only) + * @nss: number of streams (VHT and HE only) * @flag: %RX_FLAG_\* - * @vht_flag: %RX_VHT_FLAG_\* + * @encoding: &enum mac80211_rx_encoding + * @bw: &enum rate_info_bw + * @enc_flags: uses bits from &enum mac80211_rx_encoding_flags * @rx_flags: internal RX flags for mac80211 * @ampdu_reference: A-MPDU reference number, must be a different value for * each A-MPDU but the same for each subframe within one A-MPDU @@ -1184,11 +1209,12 @@ struct ieee80211_rx_status { u64 boottime_ns; u32 device_timestamp; u32 ampdu_reference; - u64 flag; + u32 flag; u16 freq; - u8 vht_flag; + u8 enc_flags; + u8 encoding:2, bw:3; u8 rate_idx; - u8 vht_nss; + u8 nss; u8 rx_flags; u8 band; u8 antenna; @@ -1766,15 +1792,6 @@ struct ieee80211_sta_rates { * @max_amsdu_subframes: indicates the maximal number of MSDUs in a single * A-MSDU. Taken from the Extended Capabilities element. 0 means * unlimited. - * @max_amsdu_len: indicates the maximal length of an A-MSDU in bytes. This - * field is always valid for packets with a VHT preamble. For packets - * with a HT preamble, additional limits apply: - * + If the skb is transmitted as part of a BA agreement, the - * A-MSDU maximal size is min(max_amsdu_len, 4065) bytes. - * + If the skb is not part of a BA aggreement, the A-MSDU maximal - * size is min(max_amsdu_len, 7935) bytes. - * Both additional HT limits must be enforced by the low level driver. - * This is defined by the spec (IEEE 802.11-2012 section 8.3.2.2 NOTE 2). * @support_p2p_ps: indicates whether the STA supports P2P PS mechanism or not. * @max_rc_amsdu_len: Maximum A-MSDU size in bytes recommended by rate control. * @txq: per-TID data TX queues (if driver uses the TXQ abstraction) @@ -1797,6 +1814,22 @@ struct ieee80211_sta { bool tdls_initiator; bool mfp; u8 max_amsdu_subframes; + + /** + * @max_amsdu_len: + * indicates the maximal length of an A-MSDU in bytes. + * This field is always valid for packets with a VHT preamble. + * For packets with a HT preamble, additional limits apply: + * + * * If the skb is transmitted as part of a BA agreement, the + * A-MSDU maximal size is min(max_amsdu_len, 4065) bytes. + * * If the skb is not part of a BA aggreement, the A-MSDU maximal + * size is min(max_amsdu_len, 7935) bytes. + * + * Both additional HT limits must be enforced by the low level + * driver. This is defined by the spec (IEEE 802.11-2012 section + * 8.3.2.2 NOTE 2). + */ u16 max_amsdu_len; bool support_p2p_ps; u16 max_rc_amsdu_len; @@ -3201,26 +3234,6 @@ enum ieee80211_reconfig_type { * Returns non-zero if this device sent the last beacon. * The callback can sleep. * - * @ampdu_action: Perform a certain A-MPDU action - * The RA/TID combination determines the destination and TID we want - * the ampdu action to be performed for. The action is defined through - * ieee80211_ampdu_mlme_action. - * When the action is set to %IEEE80211_AMPDU_TX_OPERATIONAL the driver - * may neither send aggregates containing more subframes than @buf_size - * nor send aggregates in a way that lost frames would exceed the - * buffer size. If just limiting the aggregate size, this would be - * possible with a buf_size of 8: - * - TX: 1.....7 - * - RX: 2....7 (lost frame #1) - * - TX: 8..1... - * which is invalid since #1 was now re-transmitted well past the - * buffer size of 8. Correct ways to retransmit #1 would be: - * - TX: 1 or 18 or 81 - * Even "189" would be wrong since 1 could be lost again. - * - * Returns a negative error code on failure. - * The callback can sleep. - * * @get_survey: Return per-channel survey information * * @rfkill_poll: Poll rfkill hardware state. If you need this, you also @@ -3403,7 +3416,7 @@ enum ieee80211_reconfig_type { * since there won't be any time to beacon before the switch anyway. * @pre_channel_switch: This is an optional callback that is called * before a channel switch procedure is started (ie. when a STA - * gets a CSA or an userspace initiated channel-switch), allowing + * gets a CSA or a userspace initiated channel-switch), allowing * the driver to prepare for the channel switch. * @post_channel_switch: This is an optional callback that is called * after a channel switch procedure is completed, allowing the @@ -3573,6 +3586,35 @@ struct ieee80211_ops { s64 offset); void (*reset_tsf)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); int (*tx_last_beacon)(struct ieee80211_hw *hw); + + /** + * @ampdu_action: + * Perform a certain A-MPDU action. + * The RA/TID combination determines the destination and TID we want + * the ampdu action to be performed for. The action is defined through + * ieee80211_ampdu_mlme_action. + * When the action is set to %IEEE80211_AMPDU_TX_OPERATIONAL the driver + * may neither send aggregates containing more subframes than @buf_size + * nor send aggregates in a way that lost frames would exceed the + * buffer size. If just limiting the aggregate size, this would be + * possible with a buf_size of 8: + * + * - ``TX: 1.....7`` + * - ``RX: 2....7`` (lost frame #1) + * - ``TX: 8..1...`` + * + * which is invalid since #1 was now re-transmitted well past the + * buffer size of 8. Correct ways to retransmit #1 would be: + * + * - ``TX: 1 or`` + * - ``TX: 18 or`` + * - ``TX: 81`` + * + * Even ``189`` would be wrong since 1 could be lost again. + * + * Returns a negative error code on failure. + * The callback can sleep. + */ int (*ampdu_action)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_ampdu_params *params); @@ -4163,6 +4205,22 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif, int max_rates); /** + * ieee80211_sta_set_expected_throughput - set the expected tpt for a station + * + * Call this function to notify mac80211 about a change in expected throughput + * to a station. A driver for a device that does rate control in firmware can + * call this function when the expected throughput estimate towards a station + * changes. The information is used to tune the CoDel AQM applied to traffic + * going towards that station (which can otherwise be too aggressive and cause + * slow stations to starve). + * + * @pubsta: the station to set throughput for. + * @thr: the current expected throughput in kbps. + */ +void ieee80211_sta_set_expected_throughput(struct ieee80211_sta *pubsta, + u32 thr); + +/** * ieee80211_tx_status - transmit status callback * * Call this function for all transmitted frames after they have been @@ -4182,6 +4240,23 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb); /** + * ieee80211_tx_status_ext - extended transmit status callback + * + * This function can be used as a replacement for ieee80211_tx_status + * in drivers that may want to provide extra information that does not + * fit into &struct ieee80211_tx_info. + * + * Calls to this function for a single hardware must be synchronized + * against each other. Calls to this function, ieee80211_tx_status_ni() + * and ieee80211_tx_status_irqsafe() may not be mixed for a single hardware. + * + * @hw: the hardware the frame was transmitted by + * @status: tx status information + */ +void ieee80211_tx_status_ext(struct ieee80211_hw *hw, + struct ieee80211_tx_status *status); + +/** * ieee80211_tx_status_noskb - transmit status callback without skb * * This function can be used as a replacement for ieee80211_tx_status @@ -4197,9 +4272,17 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, * (NULL for multicast packets) * @info: tx status information */ -void ieee80211_tx_status_noskb(struct ieee80211_hw *hw, - struct ieee80211_sta *sta, - struct ieee80211_tx_info *info); +static inline void ieee80211_tx_status_noskb(struct ieee80211_hw *hw, + struct ieee80211_sta *sta, + struct ieee80211_tx_info *info) +{ + struct ieee80211_tx_status status = { + .sta = sta, + .info = info, + }; + + ieee80211_tx_status_ext(hw, &status); +} /** * ieee80211_tx_status_ni - transmit status callback (in process context) @@ -5260,6 +5343,7 @@ void ieee80211_resume_disconnect(struct ieee80211_vif *vif); * * @vif: &struct ieee80211_vif pointer from the add_interface callback. * @rssi_event: the RSSI trigger event type + * @rssi_level: new RSSI level value or 0 if not available * @gfp: context flags * * When the %IEEE80211_VIF_SUPPORTS_CQM_RSSI is set, and a connection quality @@ -5268,6 +5352,7 @@ void ieee80211_resume_disconnect(struct ieee80211_vif *vif); */ void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif, enum nl80211_cqm_rssi_threshold_event rssi_event, + s32 rssi_level, gfp_t gfp); /** @@ -5367,6 +5452,9 @@ void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid, */ void ieee80211_send_bar(struct ieee80211_vif *vif, u8 *ra, u16 tid, u16 ssn); +void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, const u8 *addr, + unsigned int bit); + /** * ieee80211_start_rx_ba_session_offl - start a Rx BA session * @@ -5381,8 +5469,13 @@ void ieee80211_send_bar(struct ieee80211_vif *vif, u8 *ra, u16 tid, u16 ssn); * @addr: station mac address * @tid: the rx tid */ -void ieee80211_start_rx_ba_session_offl(struct ieee80211_vif *vif, - const u8 *addr, u16 tid); +static inline void ieee80211_start_rx_ba_session_offl(struct ieee80211_vif *vif, + const u8 *addr, u16 tid) +{ + if (WARN_ON(tid >= IEEE80211_NUM_TIDS)) + return; + ieee80211_manage_rx_ba_offl(vif, addr, tid); +} /** * ieee80211_stop_rx_ba_session_offl - stop a Rx BA session @@ -5398,8 +5491,13 @@ void ieee80211_start_rx_ba_session_offl(struct ieee80211_vif *vif, * @addr: station mac address * @tid: the rx tid */ -void ieee80211_stop_rx_ba_session_offl(struct ieee80211_vif *vif, - const u8 *addr, u16 tid); +static inline void ieee80211_stop_rx_ba_session_offl(struct ieee80211_vif *vif, + const u8 *addr, u16 tid) +{ + if (WARN_ON(tid >= IEEE80211_NUM_TIDS)) + return; + ieee80211_manage_rx_ba_offl(vif, addr, tid + IEEE80211_NUM_TIDS); +} /* Rate control API */ @@ -5418,9 +5516,6 @@ void ieee80211_stop_rx_ba_session_offl(struct ieee80211_vif *vif, * RTS threshold * @short_preamble: whether mac80211 will request short-preamble transmission * if the selected rate supports it - * @max_rate_idx: user-requested maximum (legacy) rate - * (deprecated; this will be removed once drivers get updated to use - * rate_idx_mask) * @rate_idx_mask: user-requested (legacy) rate mask * @rate_idx_mcs_mask: user-requested MCS rate mask (NULL if not in use) * @bss: whether this frame is sent out in AP or IBSS mode @@ -5432,7 +5527,6 @@ struct ieee80211_tx_rate_control { struct sk_buff *skb; struct ieee80211_tx_rate reported_rate; bool rts, short_preamble; - u8 max_rate_idx; u32 rate_idx_mask; u8 *rate_idx_mcs_mask; bool bss; @@ -5454,10 +5548,9 @@ struct rate_control_ops { void (*free_sta)(void *priv, struct ieee80211_sta *sta, void *priv_sta); - void (*tx_status_noskb)(void *priv, - struct ieee80211_supported_band *sband, - struct ieee80211_sta *sta, void *priv_sta, - struct ieee80211_tx_info *info); + void (*tx_status_ext)(void *priv, + struct ieee80211_supported_band *sband, + void *priv_sta, struct ieee80211_tx_status *st); void (*tx_status)(void *priv, struct ieee80211_supported_band *sband, struct ieee80211_sta *sta, void *priv_sta, struct sk_buff *skb); diff --git a/include/net/mpls_iptunnel.h b/include/net/mpls_iptunnel.h index 179253f9dcfd..9d22bf67ac86 100644 --- a/include/net/mpls_iptunnel.h +++ b/include/net/mpls_iptunnel.h @@ -14,11 +14,12 @@ #ifndef _NET_MPLS_IPTUNNEL_H #define _NET_MPLS_IPTUNNEL_H 1 -#define MAX_NEW_LABELS 2 - struct mpls_iptunnel_encap { - u32 label[MAX_NEW_LABELS]; u8 labels; + u8 ttl_propagate; + u8 default_ttl; + u8 reserved1; + u32 label[0]; }; static inline struct mpls_iptunnel_encap *mpls_lwtunnel_encap(struct lwtunnel_state *lwtstate) diff --git a/include/net/ndisc.h b/include/net/ndisc.h index d562a2fe4860..31b1bb11ba3f 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -384,13 +384,30 @@ static inline struct neighbour *__ipv6_neigh_lookup(struct net_device *dev, cons rcu_read_lock_bh(); n = __ipv6_neigh_lookup_noref(dev, pkey); - if (n && !atomic_inc_not_zero(&n->refcnt)) + if (n && !refcount_inc_not_zero(&n->refcnt)) n = NULL; rcu_read_unlock_bh(); return n; } +static inline void __ipv6_confirm_neigh(struct net_device *dev, + const void *pkey) +{ + struct neighbour *n; + + rcu_read_lock_bh(); + n = __ipv6_neigh_lookup_noref(dev, pkey); + if (n) { + unsigned long now = jiffies; + + /* avoid dirtying neighbour */ + if (n->confirmed != now) + n->confirmed = now; + } + rcu_read_unlock_bh(); +} + int ndisc_init(void); int ndisc_late_init(void); @@ -422,8 +439,10 @@ void ndisc_update(const struct net_device *dev, struct neighbour *neigh, * IGMP */ int igmp6_init(void); +int igmp6_late_init(void); void igmp6_cleanup(void); +void igmp6_late_cleanup(void); int igmp6_event_query(struct sk_buff *skb); diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 8b683841e574..9816df225af3 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -17,6 +17,7 @@ */ #include <linux/atomic.h> +#include <linux/refcount.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/rcupdate.h> @@ -76,7 +77,7 @@ struct neigh_parms { void *sysctl_table; int dead; - atomic_t refcnt; + refcount_t refcnt; struct rcu_head rcu_head; int reachable_time; @@ -137,7 +138,7 @@ struct neighbour { unsigned long confirmed; unsigned long updated; rwlock_t lock; - atomic_t refcnt; + refcount_t refcnt; struct sk_buff_head arp_queue; unsigned int arp_queue_len_bytes; struct timer_list timer; @@ -155,7 +156,7 @@ struct neighbour { struct rcu_head rcu; struct net_device *dev; u8 primary_key[0]; -}; +} __randomize_layout; struct neigh_ops { int family; @@ -314,8 +315,10 @@ static inline struct neighbour *neigh_create(struct neigh_table *tbl, } void neigh_destroy(struct neighbour *neigh); int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb); -int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags); +int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags, + u32 nlmsg_pid); void __neigh_set_probe_once(struct neighbour *neigh); +bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl); void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev); int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev); int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb); @@ -393,12 +396,12 @@ void neigh_sysctl_unregister(struct neigh_parms *p); static inline void __neigh_parms_put(struct neigh_parms *parms) { - atomic_dec(&parms->refcnt); + refcount_dec(&parms->refcnt); } static inline struct neigh_parms *neigh_parms_clone(struct neigh_parms *parms) { - atomic_inc(&parms->refcnt); + refcount_inc(&parms->refcnt); return parms; } @@ -408,18 +411,18 @@ static inline struct neigh_parms *neigh_parms_clone(struct neigh_parms *parms) static inline void neigh_release(struct neighbour *neigh) { - if (atomic_dec_and_test(&neigh->refcnt)) + if (refcount_dec_and_test(&neigh->refcnt)) neigh_destroy(neigh); } static inline struct neighbour * neigh_clone(struct neighbour *neigh) { if (neigh) - atomic_inc(&neigh->refcnt); + refcount_inc(&neigh->refcnt); return neigh; } -#define neigh_hold(n) atomic_inc(&(n)->refcnt) +#define neigh_hold(n) refcount_inc(&(n)->refcnt) static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) { @@ -449,7 +452,7 @@ static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb) static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb) { unsigned int seq; - int hh_len; + unsigned int hh_len; do { seq = read_seqbegin(&hh->hh_lock); @@ -458,7 +461,7 @@ static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb /* this is inlined by gcc */ memcpy(skb->data - HH_DATA_MOD, hh->hh_data, HH_DATA_MOD); } else { - int hh_alen = HH_DATA_ALIGN(hh_len); + unsigned int hh_alen = HH_DATA_ALIGN(hh_len); memcpy(skb->data - hh_alen, hh->hh_data, hh_alen); } @@ -468,6 +471,16 @@ static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb return dev_queue_xmit(skb); } +static inline int neigh_output(struct neighbour *n, struct sk_buff *skb) +{ + const struct hh_cache *hh = &n->hh; + + if ((n->nud_state & NUD_CONNECTED) && hh->hh_len) + return neigh_hh_output(hh, skb); + else + return n->output(n, skb); +} + static inline struct neighbour * __neigh_lookup(struct neigh_table *tbl, const void *pkey, struct net_device *dev, int creat) { diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index af8fe8a909dc..1c401bd4c2e0 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -5,6 +5,7 @@ #define __NET_NET_NAMESPACE_H #include <linux/atomic.h> +#include <linux/refcount.h> #include <linux/workqueue.h> #include <linux/list.h> #include <linux/sysctl.h> @@ -27,6 +28,7 @@ #include <net/netns/nftables.h> #include <net/netns/xfrm.h> #include <net/netns/mpls.h> +#include <net/netns/can.h> #include <linux/ns_common.h> #include <linux/idr.h> #include <linux/skbuff.h> @@ -45,7 +47,7 @@ struct netns_ipvs; #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) struct net { - atomic_t passive; /* To decided when the network + refcount_t passive; /* To decided when the network * namespace should be freed. */ atomic_t count; /* To decided when the network @@ -141,9 +143,12 @@ struct net { #if IS_ENABLED(CONFIG_MPLS) struct netns_mpls mpls; #endif +#if IS_ENABLED(CONFIG_CAN) + struct netns_can can; +#endif struct sock *diag_nlsk; atomic_t fnhe_genid; -}; +} __randomize_layout; #include <linux/seq_file_net.h> @@ -154,6 +159,7 @@ extern struct net init_net; struct net *copy_net_ns(unsigned long flags, struct user_namespace *user_ns, struct net *old_net); +void net_ns_barrier(void); #else /* CONFIG_NET_NS */ #include <linux/sched.h> #include <linux/nsproxy.h> @@ -164,6 +170,8 @@ static inline struct net *copy_net_ns(unsigned long flags, return ERR_PTR(-EINVAL); return old_net; } + +static inline void net_ns_barrier(void) {} #endif /* CONFIG_NET_NS */ diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h index 0b0c35c37125..925524ede6c8 100644 --- a/include/net/netfilter/br_netfilter.h +++ b/include/net/netfilter/br_netfilter.h @@ -8,7 +8,7 @@ static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) skb->nf_bridge = kzalloc(sizeof(struct nf_bridge_info), GFP_ATOMIC); if (likely(skb->nf_bridge)) - atomic_set(&(skb->nf_bridge->use), 1); + refcount_set(&(skb->nf_bridge->use), 1); return skb->nf_bridge; } diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 5916aa9ab3f0..48407569585d 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -34,6 +34,7 @@ union nf_conntrack_proto { struct ip_ct_sctp sctp; struct ip_ct_tcp tcp; struct nf_ct_gre gre; + unsigned int tmpl_padto; }; union nf_conntrack_expect_proto { @@ -49,25 +50,6 @@ union nf_conntrack_expect_proto { #define NF_CT_ASSERT(x) #endif -struct nf_conntrack_helper; - -/* Must be kept in sync with the classes defined by helpers */ -#define NF_CT_MAX_EXPECT_CLASSES 4 - -/* nf_conn feature for connections that have a helper */ -struct nf_conn_help { - /* Helper. if any */ - struct nf_conntrack_helper __rcu *helper; - - struct hlist_head expectations; - - /* Current number of expected connections */ - u8 expecting[NF_CT_MAX_EXPECT_CLASSES]; - - /* private helper information. */ - char data[]; -}; - #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> @@ -75,7 +57,7 @@ struct nf_conn { /* Usage count in here is 1 for hash table, 1 per skb, * plus 1 for any connection(s) we are `master' for * - * Hint, SKB address this struct and refcnt via skb->nfct and + * Hint, SKB address this struct and refcnt via skb->_nfct and * helpers nf_conntrack_get() and nf_conntrack_put(). * Helper nf_ct_put() equals nf_conntrack_put() by dec refcnt, * beware nf_ct_get() is different and don't inc refcnt. @@ -162,12 +144,16 @@ void nf_conntrack_alter_reply(struct nf_conn *ct, int nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack); +#define NFCT_INFOMASK 7UL +#define NFCT_PTRMASK ~(NFCT_INFOMASK) + /* Return conntrack_info and tuple hash for given skb. */ static inline struct nf_conn * nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) { - *ctinfo = skb->nfctinfo; - return (struct nf_conn *)skb->nfct; + *ctinfo = skb->_nfct & NFCT_INFOMASK; + + return (struct nf_conn *)(skb->_nfct & NFCT_PTRMASK); } /* decrement reference count on a conntrack */ @@ -238,18 +224,14 @@ extern s32 (*nf_ct_nat_offset)(const struct nf_conn *ct, enum ip_conntrack_dir dir, u32 seq); -/* Fake conntrack entry for untracked connections */ -DECLARE_PER_CPU(struct nf_conn, nf_conntrack_untracked); -static inline struct nf_conn *nf_ct_untracked_get(void) -{ - return raw_cpu_ptr(&nf_conntrack_untracked); -} -void nf_ct_untracked_status_or(unsigned long bits); - /* Iterate over all conntracks: if iter returns true, it's deleted. */ -void nf_ct_iterate_cleanup(struct net *net, - int (*iter)(struct nf_conn *i, void *data), - void *data, u32 portid, int report); +void nf_ct_iterate_cleanup_net(struct net *net, + int (*iter)(struct nf_conn *i, void *data), + void *data, u32 portid, int report); + +/* also set unconfirmed conntracks as dying. Only use in module exit path. */ +void nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), + void *data); struct nf_conntrack_zone; @@ -276,11 +258,6 @@ static inline int nf_ct_is_dying(const struct nf_conn *ct) return test_bit(IPS_DYING_BIT, &ct->status); } -static inline int nf_ct_is_untracked(const struct nf_conn *ct) -{ - return test_bit(IPS_UNTRACKED_BIT, &ct->status); -} - /* Packet is received from loopback */ static inline bool nf_is_loopback_packet(const struct sk_buff *skb) { @@ -341,6 +318,12 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net, gfp_t flags); void nf_ct_tmpl_free(struct nf_conn *tmpl); +static inline void +nf_ct_set(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info info) +{ + skb->_nfct = (unsigned long)ct | info; +} + #define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count) #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count) #define NF_CT_STAT_ADD_ATOMIC(net, count, v) this_cpu_add((net)->ct.stat->count, (v)) diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 62e17d1319ff..81d7f8a30945 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -62,10 +62,10 @@ int __nf_conntrack_confirm(struct sk_buff *skb); /* Confirm a connection: returns NF_DROP if packet must be dropped. */ static inline int nf_conntrack_confirm(struct sk_buff *skb) { - struct nf_conn *ct = (struct nf_conn *)skb->nfct; + struct nf_conn *ct = (struct nf_conn *)skb_nfct(skb); int ret = NF_ACCEPT; - if (ct && !nf_ct_is_untracked(ct)) { + if (ct) { if (!nf_ct_is_confirmed(ct)) ret = __nf_conntrack_confirm(skb); if (likely(ret == NF_ACCEPT)) diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 12d967b58726..2a10c6570fcc 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -20,11 +20,11 @@ enum nf_ct_ecache_state { struct nf_conntrack_ecache { unsigned long cache; /* bitops want long */ - unsigned long missed; /* missed events */ + u16 missed; /* missed events */ u16 ctmask; /* bitmask of ct events to be delivered */ u16 expmask; /* bitmask of expect events to be delivered */ + enum nf_ct_ecache_state state:8;/* ecache state */ u32 portid; /* netlink portid of destroyer */ - enum nf_ct_ecache_state state; /* ecache state */ }; static inline struct nf_conntrack_ecache * diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 5ed33ea4718e..2ba54feaccd8 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -5,6 +5,8 @@ #ifndef _NF_CONNTRACK_EXPECT_H #define _NF_CONNTRACK_EXPECT_H +#include <linux/refcount.h> + #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_zones.h> @@ -37,7 +39,7 @@ struct nf_conntrack_expect { struct timer_list timeout; /* Usage count. */ - atomic_t use; + refcount_t use; /* Flags */ unsigned int flags; @@ -71,6 +73,7 @@ struct nf_conntrack_expect_policy { }; #define NF_CT_EXPECT_CLASS_DEFAULT 0 +#define NF_CT_EXPECT_MAX_CNT 255 int nf_conntrack_expect_pernet_init(struct net *net); void nf_conntrack_expect_pernet_fini(struct net *net); @@ -102,6 +105,7 @@ static inline void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) void nf_ct_remove_expectations(struct nf_conn *ct); void nf_ct_unexpect_related(struct nf_conntrack_expect *exp); +bool nf_ct_remove_expect(struct nf_conntrack_expect *exp); /* Allocate space for an expectation: this is mandatory before calling nf_ct_expect_related. You will have to call put afterwards. */ diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 1c3035dda31f..4944bc9153cf 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -43,8 +43,8 @@ enum nf_ct_ext_id { /* Extensions: optional stuff which isn't permanently in struct. */ struct nf_ct_ext { struct rcu_head rcu; - u16 offset[NF_CT_EXT_NUM]; - u16 len; + u8 offset[NF_CT_EXT_NUM]; + u8 len; char data[0]; }; @@ -69,12 +69,7 @@ static inline void *__nf_ct_ext_find(const struct nf_conn *ct, u8 id) ((id##_TYPE *)__nf_ct_ext_find((ext), (id))) /* Destroy all relationships */ -void __nf_ct_ext_destroy(struct nf_conn *ct); -static inline void nf_ct_ext_destroy(struct nf_conn *ct) -{ - if (ct->ext) - __nf_ct_ext_destroy(ct); -} +void nf_ct_ext_destroy(struct nf_conn *ct); /* Free operation. If you want to free a object referred from private area, * please implement __nf_ct_ext_free() and call it. @@ -86,15 +81,7 @@ static inline void nf_ct_ext_free(struct nf_conn *ct) } /* Add this type, returns pointer to data or NULL. */ -void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id, - size_t var_alloc_len, gfp_t gfp); - -#define nf_ct_ext_add(ct, id, gfp) \ - ((id##_TYPE *)__nf_ct_ext_add_length((ct), (id), 0, (gfp))) -#define nf_ct_ext_add_length(ct, id, len, gfp) \ - ((id##_TYPE *)__nf_ct_ext_add_length((ct), (id), (len), (gfp))) - -#define NF_CT_EXT_F_PREALLOC 0x0001 +void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp); struct nf_ct_ext_type { /* Destroys relationships (can be NULL). */ @@ -102,15 +89,11 @@ struct nf_ct_ext_type { enum nf_ct_ext_id id; - unsigned int flags; - /* Length and min alignment. */ u8 len; u8 align; - /* initial size of nf_ct_ext. */ - u8 alloc_size; }; -int nf_ct_extend_register(struct nf_ct_ext_type *type); -void nf_ct_extend_unregister(struct nf_ct_ext_type *type); +int nf_ct_extend_register(const struct nf_ct_ext_type *type); +void nf_ct_extend_unregister(const struct nf_ct_ext_type *type); #endif /* _NF_CONNTRACK_EXTEND_H */ diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 1eaac1f4cd6a..c519bb5b5bb8 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -9,6 +9,7 @@ #ifndef _NF_CONNTRACK_HELPER_H #define _NF_CONNTRACK_HELPER_H +#include <linux/refcount.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_conntrack_expect.h> @@ -26,12 +27,10 @@ struct nf_conntrack_helper { struct hlist_node hnode; /* Internal use. */ char name[NF_CT_HELPER_NAME_LEN]; /* name of the module */ + refcount_t refcnt; struct module *me; /* pointer to self */ const struct nf_conntrack_expect_policy *expect_policy; - /* length of internal data, ie. sizeof(struct nf_ct_*_master) */ - size_t data_len; - /* Tuple of things we will help (compared against server response) */ struct nf_conntrack_tuple tuple; @@ -49,20 +48,46 @@ struct nf_conntrack_helper { unsigned int expect_class_max; unsigned int flags; - unsigned int queue_num; /* For user-space helpers. */ + + /* For user-space helpers: */ + unsigned int queue_num; + /* length of userspace private data stored in nf_conn_help->data */ + u16 data_len; +}; + +/* Must be kept in sync with the classes defined by helpers */ +#define NF_CT_MAX_EXPECT_CLASSES 4 + +/* nf_conn feature for connections that have a helper */ +struct nf_conn_help { + /* Helper. if any */ + struct nf_conntrack_helper __rcu *helper; + + struct hlist_head expectations; + + /* Current number of expected connections */ + u8 expecting[NF_CT_MAX_EXPECT_CLASSES]; + + /* private helper information. */ + char data[32] __aligned(8); }; +#define NF_CT_HELPER_BUILD_BUG_ON(structsize) \ + BUILD_BUG_ON((structsize) > FIELD_SIZEOF(struct nf_conn_help, data)) + struct nf_conntrack_helper *__nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum); struct nf_conntrack_helper *nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum); +void nf_conntrack_helper_put(struct nf_conntrack_helper *helper); + void nf_ct_helper_init(struct nf_conntrack_helper *helper, u16 l3num, u16 protonum, const char *name, u16 default_port, u16 spec_port, u32 id, const struct nf_conntrack_expect_policy *exp_pol, - u32 expect_class_max, u32 data_len, + u32 expect_class_max, int (*help)(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo), diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index e01559b4d781..6d14b36e3a49 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -71,7 +71,7 @@ struct nf_conntrack_l3proto { struct module *me; }; -extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX]; +extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO]; #ifdef CONFIG_SYSCTL /* Protocol pernet registration. */ @@ -100,7 +100,7 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_generic; static inline struct nf_conntrack_l3proto * __nf_ct_l3proto_find(u_int16_t l3proto) { - if (unlikely(l3proto >= AF_MAX)) + if (unlikely(l3proto >= NFPROTO_NUMPROTO)) return &nf_conntrack_l3proto_generic; return rcu_dereference(nf_ct_l3protos[l3proto]); } diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index e7b836590f0b..7032e044bbe2 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -55,9 +55,12 @@ struct nf_conntrack_l4proto { void (*destroy)(struct nf_conn *ct); int (*error)(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, - unsigned int dataoff, enum ip_conntrack_info *ctinfo, + unsigned int dataoff, u_int8_t pf, unsigned int hooknum); + /* called by gc worker if table is full */ + bool (*can_early_drop)(const struct nf_conn *ct); + /* Print out the per-protocol part of the tuple. Return like seq_* */ void (*print_tuple)(struct seq_file *s, const struct nf_conntrack_tuple *); diff --git a/include/net/netfilter/nf_conntrack_synproxy.h b/include/net/netfilter/nf_conntrack_synproxy.h index b0ca402c1f72..a2fcb5271726 100644 --- a/include/net/netfilter/nf_conntrack_synproxy.h +++ b/include/net/netfilter/nf_conntrack_synproxy.h @@ -52,6 +52,8 @@ struct synproxy_stats { struct synproxy_net { struct nf_conn *tmpl; struct synproxy_stats __percpu *stats; + unsigned int hook_ref4; + unsigned int hook_ref6; }; extern unsigned int synproxy_net_id; diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h index 5cc5e9e6171a..d40b89355fdd 100644 --- a/include/net/netfilter/nf_conntrack_timeout.h +++ b/include/net/netfilter/nf_conntrack_timeout.h @@ -4,6 +4,7 @@ #include <net/net_namespace.h> #include <linux/netfilter/nf_conntrack_common.h> #include <linux/netfilter/nf_conntrack_tuple_common.h> +#include <linux/refcount.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_extend.h> @@ -12,7 +13,7 @@ struct ctnl_timeout { struct list_head head; struct rcu_head rcu_head; - atomic_t refcnt; + refcount_t refcnt; char name[CTNL_TIMEOUT_NAME_MAX]; __u16 l3num; struct nf_conntrack_l4proto *l4proto; diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index 450f87f95415..42e0696f38d8 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -51,6 +51,9 @@ struct nf_logger { struct module *me; }; +/* sysctl_nf_log_all_netns - allow LOG target in all network namespaces */ +extern int sysctl_nf_log_all_netns; + /* Function to register/unregister log function. */ int nf_log_register(u_int8_t pf, struct nf_logger *logger); void nf_log_unregister(struct nf_logger *logger); diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index c327a431a6f3..05c82a1a4267 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -67,7 +67,7 @@ static inline bool nf_nat_oif_changed(unsigned int hooknum, { #if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV4) || \ IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV6) - return nat->masq_index && hooknum == NF_INET_POST_ROUTING && + return nat && nat->masq_index && hooknum == NF_INET_POST_ROUTING && CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL && nat->masq_index != out->ifindex; #else diff --git a/include/net/netfilter/nf_nat_helper.h b/include/net/netfilter/nf_nat_helper.h index 01bcc6bfbcc9..fbfa5acf4f14 100644 --- a/include/net/netfilter/nf_nat_helper.h +++ b/include/net/netfilter/nf_nat_helper.h @@ -7,31 +7,31 @@ struct sk_buff; /* These return true or false. */ -int __nf_nat_mangle_tcp_packet(struct sk_buff *skb, struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, unsigned int match_offset, - unsigned int match_len, const char *rep_buffer, - unsigned int rep_len, bool adjust); +bool __nf_nat_mangle_tcp_packet(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned int match_offset, + unsigned int match_len, const char *rep_buffer, + unsigned int rep_len, bool adjust); -static inline int nf_nat_mangle_tcp_packet(struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, - unsigned int match_offset, - unsigned int match_len, - const char *rep_buffer, - unsigned int rep_len) +static inline bool nf_nat_mangle_tcp_packet(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + unsigned int match_offset, + unsigned int match_len, + const char *rep_buffer, + unsigned int rep_len) { return __nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, match_offset, match_len, rep_buffer, rep_len, true); } -int nf_nat_mangle_udp_packet(struct sk_buff *skb, struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, unsigned int match_offset, - unsigned int match_len, const char *rep_buffer, - unsigned int rep_len); +bool nf_nat_mangle_udp_packet(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned int match_offset, + unsigned int match_len, const char *rep_buffer, + unsigned int rep_len); /* Setup NAT on this expected conntrack so it follows master, but goes * to port ct->master->saved_proto. */ diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 09948d10e38e..4454719ff849 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -24,8 +24,7 @@ struct nf_queue_entry { struct nf_queue_handler { int (*outfn)(struct nf_queue_entry *entry, unsigned int queuenum); - void (*nf_hook_drop)(struct net *net, - const struct nf_hook_entry *hooks); + unsigned int (*nf_hook_drop)(struct net *net); }; void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh); diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 7dfdb517f0be..bd5be0d691d5 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -103,6 +103,35 @@ struct nft_regs { }; }; +/* Store/load an u16 or u8 integer to/from the u32 data register. + * + * Note, when using concatenations, register allocation happens at 32-bit + * level. So for store instruction, pad the rest part with zero to avoid + * garbage values. + */ + +static inline void nft_reg_store16(u32 *dreg, u16 val) +{ + *dreg = 0; + *(u16 *)dreg = val; +} + +static inline void nft_reg_store8(u32 *dreg, u8 val) +{ + *dreg = 0; + *(u8 *)dreg = val; +} + +static inline u16 nft_reg_load16(u32 *sreg) +{ + return *(u16 *)sreg; +} + +static inline u8 nft_reg_load8(u32 *sreg) +{ + return *(u8 *)sreg; +} + static inline void nft_data_copy(u32 *dst, const struct nft_data *src, unsigned int len) { @@ -147,7 +176,7 @@ struct nft_data_desc { int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data, unsigned int size, struct nft_data_desc *desc, const struct nlattr *nla); -void nft_data_uninit(const struct nft_data *data, enum nft_data_types type); +void nft_data_release(const struct nft_data *data, enum nft_data_types type); int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, enum nft_data_types type, unsigned int len); @@ -243,11 +272,30 @@ enum nft_set_class { * characteristics * * @size: required memory - * @class: lookup performance class + * @lookup: lookup performance class + * @space: memory class */ struct nft_set_estimate { unsigned int size; - enum nft_set_class class; + enum nft_set_class lookup; + enum nft_set_class space; +}; + +/** + * struct nft_set_type - nf_tables set type + * + * @select_ops: function to select nft_set_ops + * @ops: default ops, used when no select_ops functions is present + * @list: used internally + * @owner: module reference + */ +struct nft_set_type { + const struct nft_set_ops *(*select_ops)(const struct nft_ctx *, + const struct nft_set_desc *desc, + u32 flags); + const struct nft_set_ops *ops; + struct list_head list; + struct module *owner; }; struct nft_set_ext; @@ -260,14 +308,12 @@ struct nft_expr; * @insert: insert new element into set * @activate: activate new element in the next generation * @deactivate: lookup for element and deactivate it in the next generation - * @deactivate_one: deactivate element in the next generation + * @flush: deactivate element in the next generation * @remove: remove element from set * @walk: iterate over all set elemeennts * @privsize: function to return size of set private data * @init: initialize private data of new set instance * @destroy: destroy private data of set instance - * @list: nf_tables_set_ops list node - * @owner: module reference * @elemsize: element private size * @features: features supported by the implementation */ @@ -295,16 +341,18 @@ struct nft_set_ops { void * (*deactivate)(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem); - bool (*deactivate_one)(const struct net *net, - const struct nft_set *set, - void *priv); - void (*remove)(const struct nft_set *set, + bool (*flush)(const struct net *net, + const struct nft_set *set, + void *priv); + void (*remove)(const struct net *net, + const struct nft_set *set, const struct nft_set_elem *elem); void (*walk)(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_iter *iter); - unsigned int (*privsize)(const struct nlattr * const nla[]); + unsigned int (*privsize)(const struct nlattr * const nla[], + const struct nft_set_desc *desc); bool (*estimate)(const struct nft_set_desc *desc, u32 features, struct nft_set_estimate *est); @@ -313,14 +361,13 @@ struct nft_set_ops { const struct nlattr * const nla[]); void (*destroy)(const struct nft_set *set); - struct list_head list; - struct module *owner; unsigned int elemsize; u32 features; + const struct nft_set_type *type; }; -int nft_register_set(struct nft_set_ops *ops); -void nft_unregister_set(struct nft_set_ops *ops); +int nft_register_set(struct nft_set_type *type); +void nft_unregister_set(struct nft_set_type *type); /** * struct nft_set - nf_tables set instance @@ -381,10 +428,11 @@ static inline struct nft_set *nft_set_container_of(const void *priv) return (void *)priv - offsetof(struct nft_set, data); } -struct nft_set *nf_tables_set_lookup(const struct nft_table *table, - const struct nlattr *nla, u8 genmask); -struct nft_set *nf_tables_set_lookup_byid(const struct net *net, - const struct nlattr *nla, u8 genmask); +struct nft_set *nft_set_lookup(const struct net *net, + const struct nft_table *table, + const struct nlattr *nla_set_name, + const struct nlattr *nla_set_id, + u8 genmask); static inline unsigned long nft_set_gc_interval(const struct nft_set *set) { @@ -878,6 +926,11 @@ static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chai return container_of(chain, struct nft_base_chain, chain); } +static inline bool nft_is_base_chain(const struct nft_chain *chain) +{ + return chain->flags & NFT_BASE_CHAIN; +} + int __nft_release_basechain(struct nft_ctx *ctx); unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv); @@ -984,9 +1037,9 @@ struct nft_object *nf_tables_obj_lookup(const struct nft_table *table, const struct nlattr *nla, u32 objtype, u8 genmask); -int nft_obj_notify(struct net *net, struct nft_table *table, - struct nft_object *obj, u32 portid, u32 seq, - int event, int family, int report, gfp_t gfp); +void nft_obj_notify(struct net *net, struct nft_table *table, + struct nft_object *obj, u32 portid, u32 seq, + int event, int family, int report, gfp_t gfp); /** * struct nft_object_type - stateful object type @@ -1012,7 +1065,8 @@ struct nft_object_type { unsigned int maxattr; struct module *owner; const struct nla_policy *policy; - int (*init)(const struct nlattr * const tb[], + int (*init)(const struct nft_ctx *ctx, + const struct nlattr *const tb[], struct nft_object *obj); void (*destroy)(struct nft_object *obj); int (*dump)(struct sk_buff *skb, @@ -1198,10 +1252,13 @@ struct nft_trans { struct nft_trans_rule { struct nft_rule *rule; + u32 rule_id; }; #define nft_trans_rule(trans) \ (((struct nft_trans_rule *)trans->data)->rule) +#define nft_trans_rule_id(trans) \ + (((struct nft_trans_rule *)trans->data)->rule_id) struct nft_trans_set { struct nft_set *set; diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index d150b5066201..97983d1c05e4 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -9,12 +9,13 @@ nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, struct sk_buff *skb, const struct nf_hook_state *state) { + unsigned int flags = IP6_FH_F_AUTH; int protohdr, thoff = 0; unsigned short frag_off; nft_set_pktinfo(pkt, skb, state); - protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL); + protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags); if (protohdr < 0) { nft_set_pktinfo_proto_unspec(pkt, skb); return; @@ -32,6 +33,7 @@ __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, const struct nf_hook_state *state) { #if IS_ENABLED(CONFIG_IPV6) + unsigned int flags = IP6_FH_F_AUTH; struct ipv6hdr *ip6h, _ip6h; unsigned int thoff = 0; unsigned short frag_off; @@ -50,7 +52,7 @@ __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, if (pkt_len + sizeof(*ip6h) > skb->len) return -1; - protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL); + protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags); if (protohdr < 0) return -1; diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h index 5ceb2205e4e3..381af9469e6a 100644 --- a/include/net/netfilter/nft_fib.h +++ b/include/net/netfilter/nft_fib.h @@ -32,6 +32,6 @@ void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs, void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt); -void nft_fib_store_result(void *reg, enum nft_fib_result r, +void nft_fib_store_result(void *reg, const struct nft_fib *priv, const struct nft_pktinfo *pkt, int index); #endif diff --git a/include/net/netlabel.h b/include/net/netlabel.h index efe98068880f..72d6435fc16c 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -37,7 +37,7 @@ #include <linux/in6.h> #include <net/netlink.h> #include <net/request_sock.h> -#include <linux/atomic.h> +#include <linux/refcount.h> struct cipso_v4_doi; struct calipso_doi; @@ -136,7 +136,7 @@ struct netlbl_audit { * */ struct netlbl_lsm_cache { - atomic_t refcount; + refcount_t refcount; void (*free) (const void *data); void *data; }; @@ -295,7 +295,7 @@ static inline struct netlbl_lsm_cache *netlbl_secattr_cache_alloc(gfp_t flags) cache = kzalloc(sizeof(*cache), flags); if (cache) - atomic_set(&cache->refcount, 1); + refcount_set(&cache->refcount, 1); return cache; } @@ -309,7 +309,7 @@ static inline struct netlbl_lsm_cache *netlbl_secattr_cache_alloc(gfp_t flags) */ static inline void netlbl_secattr_cache_free(struct netlbl_lsm_cache *cache) { - if (!atomic_dec_and_test(&cache->refcount)) + if (!refcount_dec_and_test(&cache->refcount)) return; if (cache->free) diff --git a/include/net/netlink.h b/include/net/netlink.h index d3938f11ae52..ef8e6c3a80a6 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -98,8 +98,8 @@ * nla_put_u8(skb, type, value) add u8 attribute to skb * nla_put_u16(skb, type, value) add u16 attribute to skb * nla_put_u32(skb, type, value) add u32 attribute to skb - * nla_put_u64_64bits(skb, type, - * value, padattr) add u64 attribute to skb + * nla_put_u64_64bit(skb, type, + * value, padattr) add u64 attribute to skb * nla_put_s8(skb, type, value) add s8 attribute to skb * nla_put_s16(skb, type, value) add s16 attribute to skb * nla_put_s32(skb, type, value) add s32 attribute to skb @@ -229,17 +229,21 @@ struct nl_info { struct nlmsghdr *nlh; struct net *nl_net; u32 portid; + bool skip_notify; }; int netlink_rcv_skb(struct sk_buff *skb, - int (*cb)(struct sk_buff *, struct nlmsghdr *)); + int (*cb)(struct sk_buff *, struct nlmsghdr *, + struct netlink_ext_ack *)); int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid, unsigned int group, int report, gfp_t flags); int nla_validate(const struct nlattr *head, int len, int maxtype, - const struct nla_policy *policy); + const struct nla_policy *policy, + struct netlink_ext_ack *extack); int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, - int len, const struct nla_policy *policy); + int len, const struct nla_policy *policy, + struct netlink_ext_ack *extack); int nla_policy_len(const struct nla_policy *, int); struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype); size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize); @@ -373,18 +377,20 @@ nlmsg_next(const struct nlmsghdr *nlh, int *remaining) * @tb: destination array with maxtype+1 elements * @maxtype: maximum attribute type to be expected * @policy: validation policy + * @extack: extended ACK report struct * * See nla_parse() */ static inline int nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen, struct nlattr *tb[], int maxtype, - const struct nla_policy *policy) + const struct nla_policy *policy, + struct netlink_ext_ack *extack) { if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) return -EINVAL; return nla_parse(tb, maxtype, nlmsg_attrdata(nlh, hdrlen), - nlmsg_attrlen(nlh, hdrlen), policy); + nlmsg_attrlen(nlh, hdrlen), policy, extack); } /** @@ -408,16 +414,19 @@ static inline struct nlattr *nlmsg_find_attr(const struct nlmsghdr *nlh, * @hdrlen: length of familiy specific header * @maxtype: maximum attribute type to be expected * @policy: validation policy + * @extack: extended ACK report struct */ static inline int nlmsg_validate(const struct nlmsghdr *nlh, int hdrlen, int maxtype, - const struct nla_policy *policy) + const struct nla_policy *policy, + struct netlink_ext_ack *extack) { if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) return -EINVAL; return nla_validate(nlmsg_attrdata(nlh, hdrlen), - nlmsg_attrlen(nlh, hdrlen), maxtype, policy); + nlmsg_attrlen(nlh, hdrlen), maxtype, policy, + extack); } /** @@ -738,14 +747,17 @@ nla_find_nested(const struct nlattr *nla, int attrtype) * @maxtype: maximum attribute type to be expected * @nla: attribute containing the nested attributes * @policy: validation policy + * @extack: extended ACK report struct * * See nla_parse() */ static inline int nla_parse_nested(struct nlattr *tb[], int maxtype, const struct nlattr *nla, - const struct nla_policy *policy) + const struct nla_policy *policy, + struct netlink_ext_ack *extack) { - return nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy); + return nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy, + extack); } /** @@ -1251,6 +1263,7 @@ static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start) * @start: container attribute * @maxtype: maximum attribute type to be expected * @policy: validation policy + * @extack: extended ACK report struct * * Validates all attributes in the nested attribute stream against the * specified policy. Attributes with a type exceeding maxtype will be @@ -1259,9 +1272,11 @@ static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start) * Returns 0 on success or a negative error code. */ static inline int nla_validate_nested(const struct nlattr *start, int maxtype, - const struct nla_policy *policy) + const struct nla_policy *policy, + struct netlink_ext_ack *extack) { - return nla_validate(nla_data(start), nla_len(start), maxtype, policy); + return nla_validate(nla_data(start), nla_len(start), maxtype, policy, + extack); } /** diff --git a/include/net/netns/can.h b/include/net/netns/can.h new file mode 100644 index 000000000000..b106e6ae2e5b --- /dev/null +++ b/include/net/netns/can.h @@ -0,0 +1,40 @@ +/* + * can in net namespaces + */ + +#ifndef __NETNS_CAN_H__ +#define __NETNS_CAN_H__ + +#include <linux/spinlock.h> + +struct dev_rcv_lists; +struct s_stats; +struct s_pstats; + +struct netns_can { +#if IS_ENABLED(CONFIG_PROC_FS) + struct proc_dir_entry *proc_dir; + struct proc_dir_entry *pde_version; + struct proc_dir_entry *pde_stats; + struct proc_dir_entry *pde_reset_stats; + struct proc_dir_entry *pde_rcvlist_all; + struct proc_dir_entry *pde_rcvlist_fil; + struct proc_dir_entry *pde_rcvlist_inv; + struct proc_dir_entry *pde_rcvlist_sff; + struct proc_dir_entry *pde_rcvlist_eff; + struct proc_dir_entry *pde_rcvlist_err; + struct proc_dir_entry *bcmproc_dir; +#endif + + /* receive filters subscribed for 'all' CAN devices */ + struct dev_rcv_lists *can_rx_alldev_list; + spinlock_t can_rcvlists_lock; + struct timer_list can_stattimer;/* timer for statistics update */ + struct s_stats *can_stats; /* packet statistics */ + struct s_pstats *can_pstats; /* receive list statistics */ + + /* CAN GW per-net gateway jobs */ + struct hlist_head cgw_list; +}; + +#endif /* __NETNS_CAN_H__ */ diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index cf799fc3fdec..17724c62de97 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -69,19 +69,6 @@ struct nf_sctp_net { }; #endif -#ifdef CONFIG_NF_CT_PROTO_UDPLITE -enum udplite_conntrack { - UDPLITE_CT_UNREPLIED, - UDPLITE_CT_REPLIED, - UDPLITE_CT_MAX -}; - -struct nf_udplite_net { - struct nf_proto_net pn; - unsigned int timeouts[UDPLITE_CT_MAX]; -}; -#endif - struct nf_ip_net { struct nf_generic_net generic; struct nf_tcp_net tcp; @@ -94,9 +81,6 @@ struct nf_ip_net { #ifdef CONFIG_NF_CT_PROTO_SCTP struct nf_sctp_net sctp; #endif -#ifdef CONFIG_NF_CT_PROTO_UDPLITE - struct nf_udplite_net udplite; -#endif }; struct ct_pcpu { diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 0378e88f6fd3..9a14a0850b0e 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -27,6 +27,15 @@ struct ping_group_range { kgid_t range[2]; }; +struct inet_hashinfo; + +struct inet_timewait_death_row { + atomic_t tw_count; + + struct inet_hashinfo *hashinfo ____cacheline_aligned_in_smp; + int sysctl_max_tw_buckets; +}; + struct netns_ipv4 { #ifdef CONFIG_SYSCTL struct ctl_table_header *forw_hdr; @@ -86,6 +95,8 @@ struct netns_ipv4 { /* Shall we try to damage output packets if routing dev changes? */ int sysctl_ip_dynaddr; int sysctl_ip_early_demux; + int sysctl_tcp_early_demux; + int sysctl_udp_early_demux; int sysctl_fwmark_reflect; int sysctl_tcp_fwmark_accept; @@ -111,6 +122,15 @@ struct netns_ipv4 { int sysctl_tcp_fin_timeout; unsigned int sysctl_tcp_notsent_lowat; int sysctl_tcp_tw_reuse; + int sysctl_tcp_sack; + int sysctl_tcp_window_scaling; + int sysctl_tcp_timestamps; + struct inet_timewait_death_row tcp_death_row; + int sysctl_max_syn_backlog; + +#ifdef CONFIG_NET_L3_MASTER_DEV + int sysctl_udp_l3mdev_accept; +#endif int sysctl_igmp_max_memberships; int sysctl_igmp_max_msf; @@ -123,6 +143,7 @@ struct netns_ipv4 { #ifdef CONFIG_SYSCTL unsigned long *sysctl_local_reserved_ports; + int sysctl_ip_prot_sock; #endif #ifdef CONFIG_IP_MROUTE @@ -135,6 +156,7 @@ struct netns_ipv4 { #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH int sysctl_fib_multipath_use_neigh; + int sysctl_fib_multipath_hash_policy; #endif unsigned int fib_seq; /* protected by rtnl_mutex */ diff --git a/include/net/netns/mpls.h b/include/net/netns/mpls.h index d29203651c01..6608b3693385 100644 --- a/include/net/netns/mpls.h +++ b/include/net/netns/mpls.h @@ -9,8 +9,11 @@ struct mpls_route; struct ctl_table_header; struct netns_mpls { + int ip_ttl_propagate; + int default_ttl; size_t platform_labels; struct mpls_route __rcu * __rcu *platform_label; + struct ctl_table_header *ctl; }; diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index c501d67172b1..b7871d018354 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -118,6 +118,9 @@ struct netns_sctp { /* Flag to indicate if PR-SCTP is enabled. */ int prsctp_enable; + /* Flag to indicate if PR-CONFIG is enabled. */ + int reconf_enable; + /* Flag to idicate if SCTP-AUTH is enabled */ int auth_enable; diff --git a/include/net/netrom.h b/include/net/netrom.h index 110350aca3df..443a4ffca7aa 100644 --- a/include/net/netrom.h +++ b/include/net/netrom.h @@ -11,6 +11,7 @@ #include <linux/list.h> #include <linux/slab.h> #include <net/sock.h> +#include <linux/refcount.h> #define NR_NETWORK_LEN 15 #define NR_TRANSPORT_LEN 5 @@ -93,7 +94,7 @@ struct nr_neigh { unsigned short count; unsigned int number; unsigned char failed; - atomic_t refcount; + refcount_t refcount; }; struct nr_route { @@ -109,7 +110,7 @@ struct nr_node { unsigned char which; unsigned char count; struct nr_route routes[3]; - atomic_t refcount; + refcount_t refcount; spinlock_t node_lock; }; @@ -118,21 +119,21 @@ struct nr_node { *********************************************************************/ #define nr_node_hold(__nr_node) \ - atomic_inc(&((__nr_node)->refcount)) + refcount_inc(&((__nr_node)->refcount)) static __inline__ void nr_node_put(struct nr_node *nr_node) { - if (atomic_dec_and_test(&nr_node->refcount)) { + if (refcount_dec_and_test(&nr_node->refcount)) { kfree(nr_node); } } #define nr_neigh_hold(__nr_neigh) \ - atomic_inc(&((__nr_neigh)->refcount)) + refcount_inc(&((__nr_neigh)->refcount)) static __inline__ void nr_neigh_put(struct nr_neigh *nr_neigh) { - if (atomic_dec_and_test(&nr_neigh->refcount)) { + if (refcount_dec_and_test(&nr_neigh->refcount)) { if (nr_neigh->ax25) ax25_cb_put(nr_neigh->ax25); kfree(nr_neigh->digipeat); diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 1a3de8b34ad2..bbdc73a3239d 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -27,6 +27,7 @@ #include <linux/device.h> #include <linux/skbuff.h> +#define nfc_dbg(dev, fmt, ...) dev_dbg((dev), "NFC: " fmt, ##__VA_ARGS__) #define nfc_info(dev, fmt, ...) dev_info((dev), "NFC: " fmt, ##__VA_ARGS__) #define nfc_err(dev, fmt, ...) dev_err((dev), "NFC: " fmt, ##__VA_ARGS__) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index f0a051480c6c..537d0a0ad4c4 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -17,6 +17,35 @@ struct tcf_walker { int register_tcf_proto_ops(struct tcf_proto_ops *ops); int unregister_tcf_proto_ops(struct tcf_proto_ops *ops); +#ifdef CONFIG_NET_CLS +struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, + bool create); +void tcf_chain_put(struct tcf_chain *chain); +int tcf_block_get(struct tcf_block **p_block, + struct tcf_proto __rcu **p_filter_chain); +void tcf_block_put(struct tcf_block *block); +int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res, bool compat_mode); + +#else +static inline +int tcf_block_get(struct tcf_block **p_block, + struct tcf_proto __rcu **p_filter_chain) +{ + return 0; +} + +static inline void tcf_block_put(struct tcf_block *block) +{ +} + +static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res, bool compat_mode) +{ + return TC_ACT_UNSPEC; +} +#endif + static inline unsigned long __cls_set_class(unsigned long *clp, unsigned long cl) { @@ -128,6 +157,25 @@ static inline void tcf_exts_to_list(const struct tcf_exts *exts, #endif } +static inline void +tcf_exts_stats_update(const struct tcf_exts *exts, + u64 bytes, u64 packets, u64 lastuse) +{ +#ifdef CONFIG_NET_CLS_ACT + int i; + + preempt_disable(); + + for (i = 0; i < exts->nr_actions; i++) { + struct tc_action *a = exts->actions[i]; + + tcf_action_stats_update(a, bytes, packets, lastuse); + } + + preempt_enable(); +#endif +} + /** * tcf_exts_exec - execute tc filter extensions * @skb: socket buffer @@ -473,6 +521,11 @@ static inline bool tc_flags_valid(u32 flags) return true; } +static inline bool tc_in_hw(u32 flags) +{ + return (flags & TCA_CLS_FLAGS_IN_HW) ? true : false; +} + enum tc_fl_command { TC_CLSFLOWER_REPLACE, TC_CLSFLOWER_DESTROY, @@ -481,6 +534,7 @@ enum tc_fl_command { struct tc_cls_flower_offload { enum tc_fl_command command; + u32 prio; unsigned long cookie; struct flow_dissector *dissector; struct fl_flow_key *mask; @@ -515,4 +569,12 @@ struct tc_cls_bpf_offload { u32 gen_flags; }; + +/* This structure holds cookie structure that is passed from user + * to the kernel for actions and classifiers + */ +struct tc_cookie { + u8 *data; + u32 len; +}; #endif diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index f1b76b8e6d2d..2579c209ea51 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -92,7 +92,7 @@ int unregister_qdisc(struct Qdisc_ops *qops); void qdisc_get_default(char *id, size_t len); int qdisc_set_default(const char *id); -void qdisc_hash_add(struct Qdisc *q); +void qdisc_hash_add(struct Qdisc *q, bool invisible); void qdisc_hash_del(struct Qdisc *q); struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle); struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle); @@ -113,9 +113,6 @@ static inline void qdisc_run(struct Qdisc *q) __qdisc_run(q); } -int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res, bool compat_mode); - static inline __be16 tc_skb_protocol(const struct sk_buff *skb) { /* We need to take extra care in case the skb came via diff --git a/include/net/protocol.h b/include/net/protocol.h index bf36ca34af7a..65ba335b0e7e 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -40,6 +40,7 @@ /* This is used to register protocols. */ struct net_protocol { void (*early_demux)(struct sk_buff *skb); + void (*early_demux_handler)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb); void (*err_handler)(struct sk_buff *skb, u32 info); unsigned int no_policy:1, @@ -54,7 +55,7 @@ struct net_protocol { #if IS_ENABLED(CONFIG_IPV6) struct inet6_protocol { void (*early_demux)(struct sk_buff *skb); - + void (*early_demux_handler)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb); void (*err_handler)(struct sk_buff *skb, @@ -92,12 +93,12 @@ struct inet_protosw { #define INET_PROTOSW_PERMANENT 0x02 /* Permanent protocols are unremovable. */ #define INET_PROTOSW_ICSK 0x04 /* Is this an inet_connection_sock? */ -extern const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS]; +extern struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS]; extern const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS]; extern const struct net_offload __rcu *inet6_offloads[MAX_INET_PROTOS]; #if IS_ENABLED(CONFIG_IPV6) -extern const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS]; +extern struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS]; #endif int inet_add_protocol(const struct net_protocol *prot, unsigned char num); diff --git a/include/net/psample.h b/include/net/psample.h new file mode 100644 index 000000000000..8888b0e1a82e --- /dev/null +++ b/include/net/psample.h @@ -0,0 +1,36 @@ +#ifndef __NET_PSAMPLE_H +#define __NET_PSAMPLE_H + +#include <uapi/linux/psample.h> +#include <linux/module.h> +#include <linux/list.h> + +struct psample_group { + struct list_head list; + struct net *net; + u32 group_num; + u32 refcount; + u32 seq; +}; + +struct psample_group *psample_group_get(struct net *net, u32 group_num); +void psample_group_put(struct psample_group *group); + +#if IS_ENABLED(CONFIG_PSAMPLE) + +void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, + u32 trunc_size, int in_ifindex, int out_ifindex, + u32 sample_rate); + +#else + +static inline void psample_sample_packet(struct psample_group *group, + struct sk_buff *skb, u32 trunc_size, + int in_ifindex, int out_ifindex, + u32 sample_rate) +{ +} + +#endif + +#endif /* __NET_PSAMPLE_H */ diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 6ebe13eb1c4c..23e22054aa60 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -1,7 +1,7 @@ /* * NET Generic infrastructure for Network protocols. * - * Definitions for request_sock + * Definitions for request_sock * * Authors: Arnaldo Carvalho de Melo <[email protected]> * @@ -19,6 +19,7 @@ #include <linux/spinlock.h> #include <linux/types.h> #include <linux/bug.h> +#include <linux/refcount.h> #include <net/sock.h> @@ -29,7 +30,7 @@ struct proto; struct request_sock_ops { int family; - int obj_size; + unsigned int obj_size; struct kmem_cache *slab; char *slab_name; int (*rtx_syn_ack)(const struct sock *sk, @@ -89,7 +90,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, return NULL; req->rsk_listener = NULL; if (attach_listener) { - if (unlikely(!atomic_inc_not_zero(&sk_listener->sk_refcnt))) { + if (unlikely(!refcount_inc_not_zero(&sk_listener->sk_refcnt))) { kmem_cache_free(ops->slab, req); return NULL; } @@ -100,7 +101,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, sk_node_init(&req_to_sk(req)->sk_node); sk_tx_queue_clear(req_to_sk(req)); req->saved_syn = NULL; - atomic_set(&req->rsk_refcnt, 0); + refcount_set(&req->rsk_refcnt, 0); return req; } @@ -108,7 +109,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, static inline void reqsk_free(struct request_sock *req) { /* temporary debugging */ - WARN_ON_ONCE(atomic_read(&req->rsk_refcnt) != 0); + WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0); req->rsk_ops->destructor(req); if (req->rsk_listener) @@ -119,12 +120,10 @@ static inline void reqsk_free(struct request_sock *req) static inline void reqsk_put(struct request_sock *req) { - if (atomic_dec_and_test(&req->rsk_refcnt)) + if (refcount_dec_and_test(&req->rsk_refcnt)) reqsk_free(req); } -extern int sysctl_max_syn_backlog; - /* * For a TCP Fast Open listener - * lock - protects the access to all the reqsk, which is co-owned by diff --git a/include/net/route.h b/include/net/route.h index c0874c87c173..cb0a76d9dde1 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -113,13 +113,16 @@ struct in_device; int ip_rt_init(void); void rt_cache_flush(struct net *net); void rt_flush_dev(struct net_device *dev); -struct rtable *__ip_route_output_key_hash(struct net *, struct flowi4 *flp, - int mp_hash); +struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *flp, + const struct sk_buff *skb); +struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *flp, + struct fib_result *res, + const struct sk_buff *skb); static inline struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp) { - return __ip_route_output_key_hash(net, flp, -1); + return ip_route_output_key_hash(net, flp, NULL); } struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, @@ -175,6 +178,9 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4 int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src, u8 tos, struct net_device *devin); +int ip_route_input_rcu(struct sk_buff *skb, __be32 dst, __be32 src, + u8 tos, struct net_device *devin, + struct fib_result *res); static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, u8 tos, struct net_device *devin) @@ -184,7 +190,9 @@ static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, rcu_read_lock(); err = ip_route_input_noref(skb, dst, src, tos, devin); if (!err) - skb_dst_force(skb); + skb_dst_force_safe(skb); + if (!skb_dst(skb)) + err = -EINVAL; rcu_read_unlock(); return err; diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 4113916cc1bb..abe6b733d473 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -4,7 +4,8 @@ #include <linux/rtnetlink.h> #include <net/netlink.h> -typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *); +typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *, + struct netlink_ext_ack *); typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *); typedef u16 (*rtnl_calcit_func)(struct sk_buff *, struct nlmsghdr *); @@ -62,15 +63,18 @@ struct rtnl_link_ops { int maxtype; const struct nla_policy *policy; int (*validate)(struct nlattr *tb[], - struct nlattr *data[]); + struct nlattr *data[], + struct netlink_ext_ack *extack); int (*newlink)(struct net *src_net, struct net_device *dev, struct nlattr *tb[], - struct nlattr *data[]); + struct nlattr *data[], + struct netlink_ext_ack *extack); int (*changelink)(struct net_device *dev, struct nlattr *tb[], - struct nlattr *data[]); + struct nlattr *data[], + struct netlink_ext_ack *extack); void (*dellink)(struct net_device *dev, struct list_head *head); @@ -87,11 +91,13 @@ struct rtnl_link_ops { int slave_maxtype; const struct nla_policy *slave_policy; int (*slave_validate)(struct nlattr *tb[], - struct nlattr *data[]); + struct nlattr *data[], + struct netlink_ext_ack *extack); int (*slave_changelink)(struct net_device *dev, struct net_device *slave_dev, struct nlattr *tb[], - struct nlattr *data[]); + struct nlattr *data[], + struct netlink_ext_ack *extack); size_t (*get_slave_size)(const struct net_device *dev, const struct net_device *slave_dev); int (*fill_slave_info)(struct sk_buff *skb, @@ -139,6 +145,10 @@ struct rtnl_af_ops { const struct nlattr *attr); int (*set_link_af)(struct net_device *dev, const struct nlattr *attr); + + int (*fill_stats_af)(struct sk_buff *skb, + const struct net_device *dev); + size_t (*get_stats_af_size)(const struct net_device *dev); }; void __rtnl_af_unregister(struct rtnl_af_ops *ops); @@ -154,7 +164,8 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname, int rtnl_delete_link(struct net_device *dev); int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm); -int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len); +int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len, + struct netlink_ext_ack *exterr); #define MODULE_ALIAS_RTNL_LINK(kind) MODULE_ALIAS("rtnl-link-" kind) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 498f81b229a4..1c123e2b2415 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -8,6 +8,8 @@ #include <linux/pkt_cls.h> #include <linux/percpu.h> #include <linux/dynamic_queue_limits.h> +#include <linux/list.h> +#include <linux/refcount.h> #include <net/gen_stats.h> #include <net/rtnetlink.h> @@ -66,6 +68,7 @@ struct Qdisc { #define TCQ_F_NOPARENT 0x40 /* root of its hierarchy : * qdisc_tree_decrease_qlen() should stop. */ +#define TCQ_F_INVISIBLE 0x80 /* invisible by default in dump */ u32 limit; const struct Qdisc_ops *ops; struct qdisc_size_table __rcu *stab; @@ -93,7 +96,7 @@ struct Qdisc { struct sk_buff *skb_bad_txq; struct rcu_head rcu_head; int padded; - atomic_t refcnt; + refcount_t refcnt; spinlock_t busylock ____cacheline_aligned_in_smp; }; @@ -152,7 +155,7 @@ struct Qdisc_class_ops { void (*walk)(struct Qdisc *, struct qdisc_walker * arg); /* Filter manipulation */ - struct tcf_proto __rcu ** (*tcf_chain)(struct Qdisc *, unsigned long); + struct tcf_block * (*tcf_block)(struct Qdisc *, unsigned long); bool (*tcf_cl_offload)(u32 classid); unsigned long (*bind_tcf)(struct Qdisc *, unsigned long, u32 classid); @@ -191,8 +194,13 @@ struct Qdisc_ops { struct tcf_result { - unsigned long class; - u32 classid; + union { + struct { + unsigned long class; + u32 classid; + }; + const struct tcf_proto *goto_tp; + }; }; struct tcf_proto_ops { @@ -203,14 +211,14 @@ struct tcf_proto_ops { const struct tcf_proto *, struct tcf_result *); int (*init)(struct tcf_proto*); - bool (*destroy)(struct tcf_proto*, bool); + void (*destroy)(struct tcf_proto*); unsigned long (*get)(struct tcf_proto*, u32 handle); int (*change)(struct net *net, struct sk_buff *, struct tcf_proto*, unsigned long, u32 handle, struct nlattr **, unsigned long *, bool); - int (*delete)(struct tcf_proto*, unsigned long); + int (*delete)(struct tcf_proto*, unsigned long, bool*); void (*walk)(struct tcf_proto*, struct tcf_walker *arg); /* rtnetlink specific */ @@ -235,6 +243,7 @@ struct tcf_proto { struct Qdisc *q; void *data; const struct tcf_proto_ops *ops; + struct tcf_chain *chain; struct rcu_head rcu; }; @@ -246,6 +255,19 @@ struct qdisc_skb_cb { unsigned char data[QDISC_CB_PRIV_LEN]; }; +struct tcf_chain { + struct tcf_proto __rcu *filter_chain; + struct tcf_proto __rcu **p_filter_chain; + struct list_head list; + struct tcf_block *block; + u32 index; /* chain index */ + unsigned int refcnt; +}; + +struct tcf_block { + struct list_head chain_list; +}; + static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) { struct qdisc_skb_cb *qcb; @@ -405,19 +427,35 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, const struct Qdisc_ops *ops, u32 parentid); void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab); -bool tcf_destroy(struct tcf_proto *tp, bool force); -void tcf_destroy_chain(struct tcf_proto __rcu **fl); int skb_do_redirect(struct sk_buff *); +static inline void skb_reset_tc(struct sk_buff *skb) +{ +#ifdef CONFIG_NET_CLS_ACT + skb->tc_redirected = 0; +#endif +} + static inline bool skb_at_tc_ingress(const struct sk_buff *skb) { #ifdef CONFIG_NET_CLS_ACT - return G_TC_AT(skb->tc_verd) & AT_INGRESS; + return skb->tc_at_ingress; #else return false; #endif } +static inline bool skb_skip_tc_classify(struct sk_buff *skb) +{ +#ifdef CONFIG_NET_CLS_ACT + if (skb->tc_skip_classify) { + skb->tc_skip_classify = 0; + return true; + } +#endif + return false; +} + /* Reset all TX qdiscs greater then index of a device. */ static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i) { diff --git a/include/net/scm.h b/include/net/scm.h index 59fa93c01d2a..142ea9e7a6d0 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -3,6 +3,7 @@ #include <linux/limits.h> #include <linux/net.h> +#include <linux/cred.h> #include <linux/security.h> #include <linux/pid.h> #include <linux/nsproxy.h> diff --git a/include/net/sctp/auth.h b/include/net/sctp/auth.h index 9b9fb122b31f..e5c57d0a082d 100644 --- a/include/net/sctp/auth.h +++ b/include/net/sctp/auth.h @@ -31,6 +31,7 @@ #define __sctp_auth_h__ #include <linux/list.h> +#include <linux/refcount.h> struct sctp_endpoint; struct sctp_association; @@ -53,7 +54,7 @@ struct sctp_hmac { * over SCTP-AUTH */ struct sctp_auth_bytes { - atomic_t refcnt; + refcount_t refcnt; __u32 len; __u8 data[]; }; @@ -76,7 +77,7 @@ static inline void sctp_auth_key_hold(struct sctp_auth_bytes *key) if (!key) return; - atomic_inc(&key->refcnt); + refcount_inc(&key->refcnt); } void sctp_auth_key_put(struct sctp_auth_bytes *key); @@ -97,8 +98,10 @@ void sctp_auth_asoc_set_default_hmac(struct sctp_association *asoc, struct sctp_hmac_algo_param *hmacs); int sctp_auth_asoc_verify_hmac_id(const struct sctp_association *asoc, __be16 hmac_id); -int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc); -int sctp_auth_recv_cid(sctp_cid_t chunk, const struct sctp_association *asoc); +int sctp_auth_send_cid(enum sctp_cid chunk, + const struct sctp_association *asoc); +int sctp_auth_recv_cid(enum sctp_cid chunk, + const struct sctp_association *asoc); void sctp_auth_calculate_hmac(const struct sctp_association *asoc, struct sk_buff *skb, struct sctp_auth_chunk *auth, gfp_t gfp); diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index d4a20d00461c..d4679e7a5ed5 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -132,7 +132,7 @@ typedef union { struct sctp_association *asoc; struct sctp_transport *transport; struct sctp_bind_addr *bp; - sctp_init_chunk_t *init; + struct sctp_init_chunk *init; struct sctp_ulpevent *ulpevent; struct sctp_packet *packet; sctp_sackhdr_t *sackh; @@ -173,7 +173,7 @@ SCTP_ARG_CONSTRUCTOR(CHUNK, struct sctp_chunk *, chunk) SCTP_ARG_CONSTRUCTOR(ASOC, struct sctp_association *, asoc) SCTP_ARG_CONSTRUCTOR(TRANSPORT, struct sctp_transport *, transport) SCTP_ARG_CONSTRUCTOR(BA, struct sctp_bind_addr *, bp) -SCTP_ARG_CONSTRUCTOR(PEER_INIT, sctp_init_chunk_t *, init) +SCTP_ARG_CONSTRUCTOR(PEER_INIT, struct sctp_init_chunk *, init) SCTP_ARG_CONSTRUCTOR(ULPEVENT, struct sctp_ulpevent *, ulpevent) SCTP_ARG_CONSTRUCTOR(PACKET, struct sctp_packet *, packet) SCTP_ARG_CONSTRUCTOR(SACKH, sctp_sackhdr_t *, sackh) diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 5b847e49f7e9..9b18044c551e 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -60,11 +60,14 @@ enum { SCTP_DEFAULT_INSTREAMS = SCTP_MAX_STREAM }; #define SCTP_NUM_PRSCTP_CHUNK_TYPES 1 +#define SCTP_NUM_RECONF_CHUNK_TYPES 1 + #define SCTP_NUM_AUTH_CHUNK_TYPES 1 #define SCTP_NUM_CHUNK_TYPES (SCTP_NUM_BASE_CHUNK_TYPES + \ SCTP_NUM_ADDIP_CHUNK_TYPES +\ SCTP_NUM_PRSCTP_CHUNK_TYPES +\ + SCTP_NUM_RECONF_CHUNK_TYPES +\ SCTP_NUM_AUTH_CHUNK_TYPES) /* These are the different flavours of event. */ @@ -90,6 +93,7 @@ typedef enum { SCTP_EVENT_TIMEOUT_T4_RTO, SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD, SCTP_EVENT_TIMEOUT_HEARTBEAT, + SCTP_EVENT_TIMEOUT_RECONF, SCTP_EVENT_TIMEOUT_SACK, SCTP_EVENT_TIMEOUT_AUTOCLOSE, } sctp_event_timeout_t; @@ -113,9 +117,10 @@ typedef enum { SCTP_PRIMITIVE_SEND, SCTP_PRIMITIVE_REQUESTHEARTBEAT, SCTP_PRIMITIVE_ASCONF, + SCTP_PRIMITIVE_RECONF, } sctp_event_primitive_t; -#define SCTP_EVENT_PRIMITIVE_MAX SCTP_PRIMITIVE_ASCONF +#define SCTP_EVENT_PRIMITIVE_MAX SCTP_PRIMITIVE_RECONF #define SCTP_NUM_PRIMITIVE_TYPES (SCTP_EVENT_PRIMITIVE_MAX + 1) /* We define here a utility type for manipulating subtypes. @@ -125,7 +130,7 @@ typedef enum { */ typedef union { - sctp_cid_t chunk; + enum sctp_cid chunk; sctp_event_timeout_t timeout; sctp_event_other_t other; sctp_event_primitive_t primitive; @@ -136,7 +141,7 @@ static inline sctp_subtype_t \ SCTP_ST_## _name (_type _arg) \ { sctp_subtype_t _retval; _retval._elt = _arg; return _retval; } -SCTP_SUBTYPE_CONSTRUCTOR(CHUNK, sctp_cid_t, chunk) +SCTP_SUBTYPE_CONSTRUCTOR(CHUNK, enum sctp_cid, chunk) SCTP_SUBTYPE_CONSTRUCTOR(TIMEOUT, sctp_event_timeout_t, timeout) SCTP_SUBTYPE_CONSTRUCTOR(OTHER, sctp_event_other_t, other) SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, sctp_event_primitive_t, primitive) @@ -147,7 +152,7 @@ SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, sctp_event_primitive_t, primitive) /* Calculate the actual data size in a data chunk */ #define SCTP_DATA_SNDSIZE(c) ((int)((unsigned long)(c->chunk_end)\ - (unsigned long)(c->chunk_hdr)\ - - sizeof(sctp_data_chunk_t))) + - sizeof(struct sctp_data_chunk))) /* Internal error codes */ typedef enum { diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index d8833a86cd7e..980807d7506f 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -141,6 +141,8 @@ int sctp_primitive_ABORT(struct net *, struct sctp_association *, void *arg); int sctp_primitive_SEND(struct net *, struct sctp_association *, void *arg); int sctp_primitive_REQUESTHEARTBEAT(struct net *, struct sctp_association *, void *arg); int sctp_primitive_ASCONF(struct net *, struct sctp_association *, void *arg); +int sctp_primitive_RECONF(struct net *net, struct sctp_association *asoc, + void *arg); /* * sctp/input.c @@ -192,6 +194,15 @@ void sctp_remaddr_proc_exit(struct net *net); int sctp_offload_init(void); /* + * sctp/stream.c + */ +int sctp_send_reset_streams(struct sctp_association *asoc, + struct sctp_reset_streams *params); +int sctp_send_reset_assoc(struct sctp_association *asoc); +int sctp_send_add_streams(struct sctp_association *asoc, + struct sctp_add_streams *params); + +/* * Module global variables */ @@ -283,7 +294,6 @@ extern atomic_t sctp_dbg_objcnt_chunk; extern atomic_t sctp_dbg_objcnt_bind_addr; extern atomic_t sctp_dbg_objcnt_bind_bucket; extern atomic_t sctp_dbg_objcnt_addr; -extern atomic_t sctp_dbg_objcnt_ssnmap; extern atomic_t sctp_dbg_objcnt_datamsg; extern atomic_t sctp_dbg_objcnt_keys; @@ -438,10 +448,9 @@ static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu) return frag; } -static inline void sctp_assoc_pending_pmtu(struct sock *sk, struct sctp_association *asoc) +static inline void sctp_assoc_pending_pmtu(struct sctp_association *asoc) { - - sctp_assoc_sync_pmtu(sk, asoc); + sctp_assoc_sync_pmtu(asoc); asoc->pmtu_pending = 0; } @@ -460,8 +469,10 @@ _sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length), member) #define _sctp_walk_params(pos, chunk, end, member)\ for (pos.v = chunk->member;\ + (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <\ + (void *)chunk + end) &&\ pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\ - ntohs(pos.p->length) >= sizeof(sctp_paramhdr_t);\ + ntohs(pos.p->length) >= sizeof(struct sctp_paramhdr);\ pos.v += SCTP_PAD4(ntohs(pos.p->length))) #define sctp_walk_errors(err, chunk_hdr)\ @@ -469,7 +480,9 @@ _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length)) #define _sctp_walk_errors(err, chunk_hdr, end)\ for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \ - sizeof(sctp_chunkhdr_t));\ + sizeof(struct sctp_chunkhdr));\ + ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <\ + (void *)chunk_hdr + end) &&\ (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\ ntohs(err->length) >= sizeof(sctp_errhdr_t); \ err = (sctp_errhdr_t *)((void *)err + SCTP_PAD4(ntohs(err->length)))) @@ -586,12 +599,23 @@ static inline void sctp_v4_map_v6(union sctp_addr *addr) */ static inline struct dst_entry *sctp_transport_dst_check(struct sctp_transport *t) { - if (t->dst && !dst_check(t->dst, t->dst_cookie)) { - dst_release(t->dst); - t->dst = NULL; - } + if (t->dst && !dst_check(t->dst, t->dst_cookie)) + sctp_transport_dst_release(t); return t->dst; } +static inline bool sctp_transport_pmtu_check(struct sctp_transport *t) +{ + __u32 pmtu = max_t(size_t, SCTP_TRUNC4(dst_mtu(t->dst)), + SCTP_DEFAULT_MINSEGMENT); + + if (t->pathmtu == pmtu) + return true; + + t->pathmtu = pmtu; + + return false; +} + #endif /* __net_sctp_h__ */ diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index ca6c971dd74a..860f378333b5 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -135,6 +135,7 @@ sctp_state_fn_t sctp_sf_do_8_5_1_E_sa; sctp_state_fn_t sctp_sf_cookie_echoed_err; sctp_state_fn_t sctp_sf_do_asconf; sctp_state_fn_t sctp_sf_do_asconf_ack; +sctp_state_fn_t sctp_sf_do_reconf; sctp_state_fn_t sctp_sf_do_9_2_reshutack; sctp_state_fn_t sctp_sf_eat_fwd_tsn; sctp_state_fn_t sctp_sf_eat_fwd_tsn_fast; @@ -157,6 +158,7 @@ sctp_state_fn_t sctp_sf_error_shutdown; sctp_state_fn_t sctp_sf_ignore_primitive; sctp_state_fn_t sctp_sf_do_prm_requestheartbeat; sctp_state_fn_t sctp_sf_do_prm_asconf; +sctp_state_fn_t sctp_sf_do_prm_reconf; /* Prototypes for other event state functions. */ sctp_state_fn_t sctp_sf_do_no_pending_tsn; @@ -167,6 +169,7 @@ sctp_state_fn_t sctp_sf_cookie_wait_icmp_abort; /* Prototypes for timeout event state functions. */ sctp_state_fn_t sctp_sf_do_6_3_3_rtx; +sctp_state_fn_t sctp_sf_send_reconf; sctp_state_fn_t sctp_sf_do_6_2_sack; sctp_state_fn_t sctp_sf_autoclose_timer_expire; @@ -259,10 +262,54 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc, __u32 new_cum_tsn, size_t nstreams, struct sctp_fwdtsn_skip *skiplist); struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc); - +struct sctp_chunk *sctp_make_strreset_req( + const struct sctp_association *asoc, + __u16 stream_num, __u16 *stream_list, + bool out, bool in); +struct sctp_chunk *sctp_make_strreset_tsnreq( + const struct sctp_association *asoc); +struct sctp_chunk *sctp_make_strreset_addstrm( + const struct sctp_association *asoc, + __u16 out, __u16 in); +struct sctp_chunk *sctp_make_strreset_resp( + const struct sctp_association *asoc, + __u32 result, __u32 sn); +struct sctp_chunk *sctp_make_strreset_tsnresp( + struct sctp_association *asoc, + __u32 result, __u32 sn, + __u32 sender_tsn, __u32 receiver_tsn); +bool sctp_verify_reconf(const struct sctp_association *asoc, + struct sctp_chunk *chunk, + struct sctp_paramhdr **errp); void sctp_chunk_assign_tsn(struct sctp_chunk *); void sctp_chunk_assign_ssn(struct sctp_chunk *); +/* Prototypes for stream-processing functions. */ +struct sctp_chunk *sctp_process_strreset_outreq( + struct sctp_association *asoc, + union sctp_params param, + struct sctp_ulpevent **evp); +struct sctp_chunk *sctp_process_strreset_inreq( + struct sctp_association *asoc, + union sctp_params param, + struct sctp_ulpevent **evp); +struct sctp_chunk *sctp_process_strreset_tsnreq( + struct sctp_association *asoc, + union sctp_params param, + struct sctp_ulpevent **evp); +struct sctp_chunk *sctp_process_strreset_addstrm_out( + struct sctp_association *asoc, + union sctp_params param, + struct sctp_ulpevent **evp); +struct sctp_chunk *sctp_process_strreset_addstrm_in( + struct sctp_association *asoc, + union sctp_params param, + struct sctp_ulpevent **evp); +struct sctp_chunk *sctp_process_strreset_resp( + struct sctp_association *asoc, + union sctp_params param, + struct sctp_ulpevent **evp); + /* Prototypes for statetable processing. */ int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype, @@ -275,21 +322,20 @@ int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype, /* 2nd level prototypes */ void sctp_generate_t3_rtx_event(unsigned long peer); void sctp_generate_heartbeat_event(unsigned long peer); +void sctp_generate_reconf_event(unsigned long peer); void sctp_generate_proto_unreach_event(unsigned long peer); -void sctp_ootb_pkt_free(struct sctp_packet *); +void sctp_ootb_pkt_free(struct sctp_packet *packet); -struct sctp_association *sctp_unpack_cookie(const struct sctp_endpoint *, - const struct sctp_association *, - struct sctp_chunk *, +struct sctp_association *sctp_unpack_cookie(const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + struct sctp_chunk *chunk, gfp_t gfp, int *err, struct sctp_chunk **err_chk_p); -int sctp_addip_addr_config(struct sctp_association *, sctp_param_t, - struct sockaddr_storage*, int); /* 3rd level prototypes */ -__u32 sctp_generate_tag(const struct sctp_endpoint *); -__u32 sctp_generate_tsn(const struct sctp_endpoint *); +__u32 sctp_generate_tag(const struct sctp_endpoint *ep); +__u32 sctp_generate_tsn(const struct sctp_endpoint *ep); /* Extern declarations for major data structures. */ extern sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES]; @@ -301,7 +347,7 @@ static inline __u16 sctp_data_size(struct sctp_chunk *chunk) __u16 size; size = ntohs(chunk->chunk_hdr->length); - size -= sizeof(sctp_data_chunk_t); + size -= sizeof(struct sctp_data_chunk); return size; } diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 92daabdc007d..5ab29af8ca8a 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -82,8 +82,8 @@ struct sctp_outq; struct sctp_bind_addr; struct sctp_ulpq; struct sctp_ep_common; -struct sctp_ssnmap; struct crypto_shash; +struct sctp_stream; #include <net/sctp/tsnmap.h> @@ -310,9 +310,10 @@ struct sctp_cookie { __u32 adaptation_ind; - __u8 auth_random[sizeof(sctp_paramhdr_t) + SCTP_AUTH_RANDOM_LENGTH]; + __u8 auth_random[sizeof(struct sctp_paramhdr) + + SCTP_AUTH_RANDOM_LENGTH]; __u8 auth_hmacs[SCTP_AUTH_NUM_HMACS * sizeof(__u16) + 2]; - __u8 auth_chunks[sizeof(sctp_paramhdr_t) + SCTP_AUTH_MAX_CHUNKS]; + __u8 auth_chunks[sizeof(struct sctp_paramhdr) + SCTP_AUTH_MAX_CHUNKS]; /* This is a shim for my peer's INIT packet, followed by * a copy of the raw address list of the association. @@ -375,56 +376,26 @@ typedef struct sctp_sender_hb_info { union sctp_addr daddr; unsigned long sent_at; __u64 hb_nonce; -} __packed sctp_sender_hb_info_t; +} sctp_sender_hb_info_t; -/* - * RFC 2960 1.3.2 Sequenced Delivery within Streams - * - * The term "stream" is used in SCTP to refer to a sequence of user - * messages that are to be delivered to the upper-layer protocol in - * order with respect to other messages within the same stream. This is - * in contrast to its usage in TCP, where it refers to a sequence of - * bytes (in this document a byte is assumed to be eight bits). - * ... - * - * This is the structure we use to track both our outbound and inbound - * SSN, or Stream Sequence Numbers. - */ - -struct sctp_stream { - __u16 *ssn; - unsigned int len; -}; - -struct sctp_ssnmap { - struct sctp_stream in; - struct sctp_stream out; -}; - -struct sctp_ssnmap *sctp_ssnmap_new(__u16 in, __u16 out, - gfp_t gfp); -void sctp_ssnmap_free(struct sctp_ssnmap *map); -void sctp_ssnmap_clear(struct sctp_ssnmap *map); +int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, + gfp_t gfp); +void sctp_stream_free(struct sctp_stream *stream); +void sctp_stream_clear(struct sctp_stream *stream); +void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new); /* What is the current SSN number for this stream? */ -static inline __u16 sctp_ssn_peek(struct sctp_stream *stream, __u16 id) -{ - return stream->ssn[id]; -} +#define sctp_ssn_peek(stream, type, sid) \ + ((stream)->type[sid].ssn) /* Return the next SSN number for this stream. */ -static inline __u16 sctp_ssn_next(struct sctp_stream *stream, __u16 id) -{ - return stream->ssn[id]++; -} +#define sctp_ssn_next(stream, type, sid) \ + ((stream)->type[sid].ssn++) /* Skip over this ssn and all below. */ -static inline void sctp_ssn_skip(struct sctp_stream *stream, __u16 id, - __u16 ssn) -{ - stream->ssn[id] = ssn+1; -} - +#define sctp_ssn_skip(stream, type, sid, ssn) \ + ((stream)->type[sid].ssn = ssn + 1) + /* * Pointers to address related SCTP functions. * (i.e. things that depend on the address family.) @@ -509,7 +480,8 @@ struct sctp_pf { int (*send_verify) (struct sctp_sock *, union sctp_addr *); int (*supported_addrs)(const struct sctp_sock *, __be16 *); struct sock *(*create_accept_sk) (struct sock *sk, - struct sctp_association *asoc); + struct sctp_association *asoc, + bool kern); int (*addr_to_user)(struct sctp_sock *sk, union sctp_addr *addr); void (*to_sk_saddr)(union sctp_addr *, struct sock *sk); void (*to_sk_daddr)(union sctp_addr *, struct sock *sk); @@ -524,7 +496,7 @@ struct sctp_datamsg { /* Chunks waiting to be submitted to lower layer. */ struct list_head chunks; /* Reference counting. */ - atomic_t refcnt; + refcount_t refcnt; /* When is this message no longer interesting to the peer? */ unsigned long expires_at; /* Did the messenge fail to send? */ @@ -552,7 +524,7 @@ int sctp_chunk_abandoned(struct sctp_chunk *); struct sctp_chunk { struct list_head list; - atomic_t refcnt; + refcount_t refcnt; /* How many times this chunk have been sent, for prsctp RTX policy */ int sent_count; @@ -722,10 +694,9 @@ struct sctp_packet { ipfragok:1; /* So let ip fragment this packet */ }; -struct sctp_packet *sctp_packet_init(struct sctp_packet *, - struct sctp_transport *, - __u16 sport, __u16 dport); -struct sctp_packet *sctp_packet_config(struct sctp_packet *, __u32 vtag, int); +void sctp_packet_init(struct sctp_packet *, struct sctp_transport *, + __u16 sport, __u16 dport); +void sctp_packet_config(struct sctp_packet *, __u32 vtag, int); sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *, struct sctp_chunk *, int, gfp_t); sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *, @@ -764,7 +735,7 @@ struct sctp_transport { struct rhlist_head node; /* Reference counting. */ - atomic_t refcnt; + refcount_t refcnt; /* RTO-Pending : A flag used to track if one of the DATA * chunks sent to this address is currently being * used to compute a RTT. If this flag is 0, @@ -785,6 +756,8 @@ struct sctp_transport { /* Is the Path MTU update pending on this tranport */ pmtu_pending:1, + dst_pending_confirm:1, /* need to confirm neighbour */ + /* Has this transport moved the ctsn since we last sacked */ sack_generation:1; u32 dst_cookie; @@ -911,6 +884,9 @@ struct sctp_transport { /* Timer to handle ICMP proto unreachable envets */ struct timer_list proto_unreach_timer; + /* Timer to handler reconf chunk rtx */ + struct timer_list reconf_timer; + /* Since we're using per-destination retransmission timers * (see above), we're also using per-destination "transmitted" * queues. This probably ought to be a private struct @@ -969,6 +945,7 @@ void sctp_transport_pmtu(struct sctp_transport *, struct sock *sk); void sctp_transport_free(struct sctp_transport *); void sctp_transport_reset_t3_rtx(struct sctp_transport *); void sctp_transport_reset_hb_timer(struct sctp_transport *); +void sctp_transport_reset_reconf_timer(struct sctp_transport *transport); int sctp_transport_hold(struct sctp_transport *); void sctp_transport_put(struct sctp_transport *); void sctp_transport_update_rto(struct sctp_transport *, __u32); @@ -977,9 +954,11 @@ void sctp_transport_lower_cwnd(struct sctp_transport *, sctp_lower_cwnd_t); void sctp_transport_burst_limited(struct sctp_transport *); void sctp_transport_burst_reset(struct sctp_transport *); unsigned long sctp_transport_timeout(struct sctp_transport *); -void sctp_transport_reset(struct sctp_transport *); -void sctp_transport_update_pmtu(struct sock *, struct sctp_transport *, u32); +void sctp_transport_reset(struct sctp_transport *t); +void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu); void sctp_transport_immediate_rtx(struct sctp_transport *); +void sctp_transport_dst_release(struct sctp_transport *t); +void sctp_transport_dst_confirm(struct sctp_transport *t); /* This is the structure we use to queue packets as they come into @@ -1195,7 +1174,7 @@ struct sctp_ep_common { * refcnt - Reference count access to this object. * dead - Do not attempt to use this object. */ - atomic_t refcnt; + refcount_t refcnt; bool dead; /* What socket does this endpoint belong to? */ @@ -1285,7 +1264,10 @@ struct sctp_endpoint { struct list_head endpoint_shared_keys; __u16 active_key_id; __u8 auth_enable:1, - prsctp_enable:1; + prsctp_enable:1, + reconf_enable:1; + + __u8 strreset_enable; }; /* Recover the outter endpoint structure. */ @@ -1316,11 +1298,11 @@ int sctp_has_association(struct net *net, const union sctp_addr *laddr, int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - sctp_cid_t, sctp_init_chunk_t *peer_init, + enum sctp_cid cid, struct sctp_init_chunk *peer_init, struct sctp_chunk *chunk, struct sctp_chunk **err_chunk); int sctp_process_init(struct sctp_association *, struct sctp_chunk *chunk, const union sctp_addr *peer, - sctp_init_chunk_t *init, gfp_t gfp); + struct sctp_init_chunk *init, gfp_t gfp); __u32 sctp_generate_tag(const struct sctp_endpoint *); __u32 sctp_generate_tsn(const struct sctp_endpoint *); @@ -1332,6 +1314,27 @@ struct sctp_inithdr_host { __u32 initial_tsn; }; +struct sctp_stream_out { + __u16 ssn; + __u8 state; + __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1]; + __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1]; +}; + +struct sctp_stream_in { + __u16 ssn; +}; + +struct sctp_stream { + struct sctp_stream_out *out; + struct sctp_stream_in *in; + __u16 outcnt; + __u16 incnt; +}; + +#define SCTP_STREAM_CLOSED 0x00 +#define SCTP_STREAM_OPEN 0x01 + /* SCTP_GET_ASSOC_STATS counters */ struct sctp_priv_assoc_stats { /* Maximum observed rto in the association during subsequent @@ -1519,6 +1522,7 @@ struct sctp_association { hostname_address:1, /* Peer understands DNS addresses? */ asconf_capable:1, /* Does peer support ADDIP? */ prsctp_capable:1, /* Can peer do PR-SCTP? */ + reconf_capable:1, /* Can peer do RE-CONFIG? */ auth_capable:1; /* Is peer doing SCTP-AUTH? */ /* sack_needed : This flag indicates if the next received @@ -1747,8 +1751,8 @@ struct sctp_association { /* Default receive parameters */ __u32 default_rcv_context; - /* This tracks outbound ssn for a given stream. */ - struct sctp_ssnmap *ssnmap; + /* Stream arrays */ + struct sctp_stream stream; /* All outbound chunks go through this structure. */ struct sctp_outq outqueue; @@ -1878,7 +1882,18 @@ struct sctp_association { __u8 need_ecne:1, /* Need to send an ECNE Chunk? */ temp:1, /* Is it a temporary association? */ - prsctp_enable:1; + force_delay:1, + prsctp_enable:1, + reconf_enable:1; + + __u8 strreset_enable; + __u8 strreset_outstanding; /* request param count on the fly */ + + __u32 strreset_outseq; /* Update after receiving response */ + __u32 strreset_inseq; /* Update after receiving request */ + __u32 strreset_result[2]; /* save the results of last 2 responses */ + + struct sctp_chunk *strreset_chunk; /* save request chunk */ struct sctp_priv_assoc_stats stats; @@ -1939,12 +1954,12 @@ struct sctp_transport *sctp_assoc_is_match(struct sctp_association *, const union sctp_addr *, const union sctp_addr *); void sctp_assoc_migrate(struct sctp_association *, struct sock *); -void sctp_assoc_update(struct sctp_association *old, - struct sctp_association *new); +int sctp_assoc_update(struct sctp_association *old, + struct sctp_association *new); __u32 sctp_association_get_next_tsn(struct sctp_association *); -void sctp_assoc_sync_pmtu(struct sock *, struct sctp_association *); +void sctp_assoc_sync_pmtu(struct sctp_association *asoc); void sctp_assoc_rwnd_increase(struct sctp_association *, unsigned int); void sctp_assoc_rwnd_decrease(struct sctp_association *, unsigned int); void sctp_assoc_set_primary(struct sctp_association *, diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h index 2c098cd7e7e2..1060494ac230 100644 --- a/include/net/sctp/ulpevent.h +++ b/include/net/sctp/ulpevent.h @@ -128,6 +128,18 @@ struct sctp_ulpevent *sctp_ulpevent_make_authkey( struct sctp_ulpevent *sctp_ulpevent_make_sender_dry_event( const struct sctp_association *asoc, gfp_t gfp); +struct sctp_ulpevent *sctp_ulpevent_make_stream_reset_event( + const struct sctp_association *asoc, __u16 flags, + __u16 stream_num, __u16 *stream_list, gfp_t gfp); + +struct sctp_ulpevent *sctp_ulpevent_make_assoc_reset_event( + const struct sctp_association *asoc, __u16 flags, + __u32 local_tsn, __u32 remote_tsn, gfp_t gfp); + +struct sctp_ulpevent *sctp_ulpevent_make_stream_change_event( + const struct sctp_association *asoc, __u16 flags, + __u32 strchange_instrms, __u32 strchange_outstrms, gfp_t gfp); + void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event, struct msghdr *); void sctp_ulpevent_read_rcvinfo(const struct sctp_ulpevent *event, diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index 0caee631a836..031bf16d1521 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h @@ -6,10 +6,13 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport); -u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport, u32 *tsoff); -u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, - __be16 sport, __be16 dport, u32 *tsoff); +u32 secure_tcp_seq(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport); +u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr); +u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, + __be16 sport, __be16 dport); +u32 secure_tcpv6_ts_off(const struct net *net, + const __be32 *saddr, const __be32 *daddr); u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport); u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, diff --git a/include/net/smc.h b/include/net/smc.h new file mode 100644 index 000000000000..12d26358ad9f --- /dev/null +++ b/include/net/smc.h @@ -0,0 +1,20 @@ +/* + * Shared Memory Communications over RDMA (SMC-R) and RoCE + * + * Definitions for the SMC module (socket related) + * + * Copyright IBM Corp. 2016 + * + * Author(s): Ursula Braun <[email protected]> + */ +#ifndef _SMC_H +#define _SMC_H + +struct smc_hashinfo { + rwlock_t lock; + struct hlist_head ht; +}; + +int smc_hash_sk(struct sock *sk); +void smc_unhash_sk(struct sock *sk); +#endif /* _SMC_H */ diff --git a/include/net/sock.h b/include/net/sock.h index c4f5e6fca17c..7c0632c7e870 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -66,10 +66,12 @@ #include <linux/poll.h> #include <linux/atomic.h> +#include <linux/refcount.h> #include <net/dst.h> #include <net/checksum.h> #include <net/tcp_states.h> #include <linux/net_tstamp.h> +#include <net/smc.h> /* * This structure really needs to be cleaned up. @@ -218,7 +220,7 @@ struct sock_common { u32 skc_tw_rcv_nxt; /* struct tcp_timewait_sock */ }; - atomic_t skc_refcnt; + refcount_t skc_refcnt; /* private: */ int skc_dontcopy_end[0]; union { @@ -235,13 +237,16 @@ struct sock_common { * @sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings * @sk_lock: synchronizer + * @sk_kern_sock: True if sock is using kernel lock classes * @sk_rcvbuf: size of receive buffer in bytes * @sk_wq: sock wait queue and async head * @sk_rx_dst: receive input route used by early demux * @sk_dst_cache: destination cache + * @sk_dst_pending_confirm: need to confirm neighbour * @sk_policy: flow policy * @sk_receive_queue: incoming packets * @sk_wmem_alloc: transmit queue bytes committed + * @sk_tsq_flags: TCP Small Queues flags * @sk_write_queue: Packet sending queue * @sk_omem_alloc: "o" is "option" or "other" * @sk_wmem_queued: persistent queue size @@ -250,8 +255,10 @@ struct sock_common { * @sk_ll_usec: usecs to busypoll when there is no data * @sk_allocation: allocation mode * @sk_pacing_rate: Pacing rate (if supported by transport/packet scheduler) + * @sk_pacing_status: Pacing status (requested, handled by sch_fq) * @sk_max_pacing_rate: Maximum pacing rate (%SO_MAX_PACING_RATE) * @sk_sndbuf: size of send buffer in bytes + * @__sk_flags_offset: empty field used to determine location of bitfield * @sk_padding: unused element for alignment * @sk_no_check_tx: %SO_NO_CHECK setting, set checksum in TX packets * @sk_no_check_rx: allow zero checksum in RX packets @@ -272,6 +279,7 @@ struct sock_common { * @sk_drops: raw/udp drops counter * @sk_ack_backlog: current listen backlog * @sk_max_ack_backlog: listen backlog set in listen() + * @sk_uid: user id of owner * @sk_priority: %SO_PRIORITY setting * @sk_type: socket type (%SOCK_STREAM, etc) * @sk_protocol: which protocol this socket belongs in this network family @@ -386,12 +394,14 @@ struct sock { /* ===== cache line for TX ===== */ int sk_wmem_queued; - atomic_t sk_wmem_alloc; + refcount_t sk_wmem_alloc; unsigned long sk_tsq_flags; struct sk_buff *sk_send_head; struct sk_buff_head sk_write_queue; __s32 sk_peek_off; int sk_write_pending; + __u32 sk_dst_pending_confirm; + u32 sk_pacing_status; /* see enum sk_pacing */ long sk_sndtimeo; struct timer_list sk_timer; __u32 sk_priority; @@ -426,7 +436,8 @@ struct sock { #endif kmemcheck_bitfield_begin(flags); - unsigned int sk_padding : 2, + unsigned int sk_padding : 1, + sk_kern_sock : 1, sk_no_check_tx : 1, sk_no_check_rx : 1, sk_userlocks : 4, @@ -469,6 +480,12 @@ struct sock { struct rcu_head sk_rcu; }; +enum sk_pacing { + SK_PACING_NONE = 0, + SK_PACING_NEEDED = 1, + SK_PACING_FQ = 2, +}; + #define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data))) #define rcu_dereference_sk_user_data(sk) rcu_dereference(__sk_user_data((sk))) @@ -543,8 +560,7 @@ static inline struct sock *sk_nulls_head(const struct hlist_nulls_head *head) static inline struct sock *sk_next(const struct sock *sk) { - return sk->sk_node.next ? - hlist_entry(sk->sk_node.next, struct sock, sk_node) : NULL; + return hlist_entry_safe(sk->sk_node.next, struct sock, sk_node); } static inline struct sock *sk_nulls_next(const struct sock *sk) @@ -599,7 +615,7 @@ static inline bool __sk_del_node_init(struct sock *sk) static __always_inline void sock_hold(struct sock *sk) { - atomic_inc(&sk->sk_refcnt); + refcount_inc(&sk->sk_refcnt); } /* Ungrab socket in the context, which assumes that socket refcnt @@ -607,7 +623,7 @@ static __always_inline void sock_hold(struct sock *sk) */ static __always_inline void __sock_put(struct sock *sk) { - atomic_dec(&sk->sk_refcnt); + refcount_dec(&sk->sk_refcnt); } static inline bool sk_del_node_init(struct sock *sk) @@ -616,7 +632,7 @@ static inline bool sk_del_node_init(struct sock *sk) if (rc) { /* paranoid for a while -acme */ - WARN_ON(atomic_read(&sk->sk_refcnt) == 1); + WARN_ON(refcount_read(&sk->sk_refcnt) == 1); __sock_put(sk); } return rc; @@ -638,7 +654,7 @@ static inline bool sk_nulls_del_node_init_rcu(struct sock *sk) if (rc) { /* paranoid for a while -acme */ - WARN_ON(atomic_read(&sk->sk_refcnt) == 1); + WARN_ON(refcount_read(&sk->sk_refcnt) == 1); __sock_put(sk); } return rc; @@ -895,7 +911,10 @@ static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) static inline void sk_incoming_cpu_update(struct sock *sk) { - sk->sk_incoming_cpu = raw_smp_processor_id(); + int cpu = raw_smp_processor_id(); + + if (unlikely(sk->sk_incoming_cpu != cpu)) + sk->sk_incoming_cpu = cpu; } static inline void sock_rps_record_flow_hash(__u32 hash) @@ -986,10 +1005,11 @@ struct request_sock_ops; struct timewait_sock_ops; struct inet_hashinfo; struct raw_hashinfo; +struct smc_hashinfo; struct module; /* - * caches using SLAB_DESTROY_BY_RCU should let .next pointer from nulls nodes + * caches using SLAB_TYPESAFE_BY_RCU should let .next pointer from nulls nodes * un-modified. Special care is taken when initializing object to zero. */ static inline void sk_prot_clear_nulls(struct sock *sk, int size) @@ -1011,7 +1031,8 @@ struct proto { int addr_len); int (*disconnect)(struct sock *sk, int flags); - struct sock * (*accept)(struct sock *sk, int flags, int *err); + struct sock * (*accept)(struct sock *sk, int flags, int *err, + bool kern); int (*ioctl)(struct sock *sk, int cmd, unsigned long arg); @@ -1024,6 +1045,7 @@ struct proto { int (*getsockopt)(struct sock *sk, int level, int optname, char __user *optval, int __user *option); + void (*keepalive)(struct sock *sk, int valbool); #ifdef CONFIG_COMPAT int (*compat_setsockopt)(struct sock *sk, int level, @@ -1065,6 +1087,7 @@ struct proto { bool (*stream_memory_free)(const struct sock *sk); /* Memory pressure */ void (*enter_memory_pressure)(struct sock *sk); + void (*leave_memory_pressure)(struct sock *sk); atomic_long_t *memory_allocated; /* Current allocated memory. */ struct percpu_counter *sockets_allocated; /* Current number of sockets. */ /* @@ -1073,7 +1096,7 @@ struct proto { * All the __sk_mem_schedule() is of this nature: accounting * is strict, actions are advisory and have some latency. */ - int *memory_pressure; + unsigned long *memory_pressure; long *sysctl_mem; int *sysctl_wmem; int *sysctl_rmem; @@ -1093,6 +1116,7 @@ struct proto { struct inet_hashinfo *hashinfo; struct udp_table *udp_table; struct raw_hashinfo *raw_hash; + struct smc_hashinfo *smc_hash; } h; struct module *owner; @@ -1104,7 +1128,7 @@ struct proto { atomic_t socks; #endif int (*diag_destroy)(struct sock *sk, int err); -}; +} __randomize_layout; int proto_register(struct proto *prot, int alloc_slab); void proto_unregister(struct proto *prot); @@ -1124,9 +1148,9 @@ static inline void sk_refcnt_debug_dec(struct sock *sk) static inline void sk_refcnt_debug_release(const struct sock *sk) { - if (atomic_read(&sk->sk_refcnt) != 1) + if (refcount_read(&sk->sk_refcnt) != 1) printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n", - sk->sk_prot->name, sk, atomic_read(&sk->sk_refcnt)); + sk->sk_prot->name, sk, refcount_read(&sk->sk_refcnt)); } #else /* SOCK_REFCNT_DEBUG */ #define sk_refcnt_debug_inc(sk) do { } while (0) @@ -1177,25 +1201,6 @@ static inline bool sk_under_memory_pressure(const struct sock *sk) return !!*sk->sk_prot->memory_pressure; } -static inline void sk_leave_memory_pressure(struct sock *sk) -{ - int *memory_pressure = sk->sk_prot->memory_pressure; - - if (!memory_pressure) - return; - - if (*memory_pressure) - *memory_pressure = 0; -} - -static inline void sk_enter_memory_pressure(struct sock *sk) -{ - if (!sk->sk_prot->enter_memory_pressure) - return; - - sk->sk_prot->enter_memory_pressure(sk); -} - static inline long sk_memory_allocated(const struct sock *sk) { @@ -1520,6 +1525,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, void sk_free(struct sock *sk); void sk_destruct(struct sock *sk); struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority); +void sk_free_unlock_clone(struct sock *sk); struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, gfp_t priority); @@ -1531,7 +1537,7 @@ void sock_efree(struct sk_buff *skb); #ifdef CONFIG_INET void sock_edemux(struct sk_buff *skb); #else -#define sock_edemux(skb) sock_efree(skb) +#define sock_edemux sock_efree #endif int sock_setsockopt(struct socket *sock, int level, int op, @@ -1566,7 +1572,7 @@ int sock_cmsg_send(struct sock *sk, struct msghdr *msg, int sock_no_bind(struct socket *, struct sockaddr *, int); int sock_no_connect(struct socket *, struct sockaddr *, int, int); int sock_no_socketpair(struct socket *, struct socket *); -int sock_no_accept(struct socket *, struct socket *, int); +int sock_no_accept(struct socket *, struct socket *, int, bool); int sock_no_getname(struct socket *, struct sockaddr *, int *, int); unsigned int sock_no_poll(struct file *, struct socket *, struct poll_table_struct *); @@ -1634,7 +1640,7 @@ void sock_init_data(struct socket *sock, struct sock *sk); /* Ungrab socket and destroy it, if it was the last reference. */ static inline void sock_put(struct sock *sk) { - if (atomic_dec_and_test(&sk->sk_refcnt)) + if (refcount_dec_and_test(&sk->sk_refcnt)) sk_free(sk); } /* Generic version of sock_put(), dealing with all sockets @@ -1694,6 +1700,7 @@ static inline void sock_orphan(struct sock *sk) static inline void sock_graft(struct sock *sk, struct socket *parent) { + WARN_ON(parent->sk); write_lock_bh(&sk->sk_callback_lock); sk->sk_wq = parent->wq; parent->sk = sk; @@ -1761,6 +1768,7 @@ static inline void dst_negative_advice(struct sock *sk) if (ndst != dst) { rcu_assign_pointer(sk->sk_dst_cache, ndst); sk_tx_queue_clear(sk); + sk->sk_dst_pending_confirm = 0; } } } @@ -1771,11 +1779,9 @@ __sk_dst_set(struct sock *sk, struct dst_entry *dst) struct dst_entry *old_dst; sk_tx_queue_clear(sk); - /* - * This can be called while sk is owned by the caller only, - * with no state that can be checked in a rcu_dereference_check() cond - */ - old_dst = rcu_dereference_raw(sk->sk_dst_cache); + sk->sk_dst_pending_confirm = 0; + old_dst = rcu_dereference_protected(sk->sk_dst_cache, + lockdep_sock_is_held(sk)); rcu_assign_pointer(sk->sk_dst_cache, dst); dst_release(old_dst); } @@ -1786,6 +1792,7 @@ sk_dst_set(struct sock *sk, struct dst_entry *dst) struct dst_entry *old_dst; sk_tx_queue_clear(sk); + sk->sk_dst_pending_confirm = 0; old_dst = xchg((__force struct dst_entry **)&sk->sk_dst_cache, dst); dst_release(old_dst); } @@ -1806,6 +1813,26 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie); struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie); +static inline void sk_dst_confirm(struct sock *sk) +{ + if (!sk->sk_dst_pending_confirm) + sk->sk_dst_pending_confirm = 1; +} + +static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n) +{ + if (skb_get_dst_pending_confirm(skb)) { + struct sock *sk = skb->sk; + unsigned long now = jiffies; + + /* avoid dirtying neighbour */ + if (n->confirmed != now) + n->confirmed = now; + if (sk && sk->sk_dst_pending_confirm) + sk->sk_dst_pending_confirm = 0; + } +} + bool sk_mc_loop(struct sock *sk); static inline bool sk_can_gso(const struct sock *sk) @@ -1889,7 +1916,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro */ static inline int sk_wmem_alloc_get(const struct sock *sk) { - return atomic_read(&sk->sk_wmem_alloc) - 1; + return refcount_read(&sk->sk_wmem_alloc) - 1; } /** @@ -1923,11 +1950,10 @@ static inline bool sk_has_allocations(const struct sock *sk) * The purpose of the skwq_has_sleeper and sock_poll_wait is to wrap the memory * barrier call. They were added due to the race found within the tcp code. * - * Consider following tcp code paths: + * Consider following tcp code paths:: * - * CPU1 CPU2 - * - * sys_select receive packet + * CPU1 CPU2 + * sys_select receive packet * ... ... * __add_wait_queue update tp->rcv_nxt * ... ... @@ -2005,8 +2031,8 @@ void sk_reset_timer(struct sock *sk, struct timer_list *timer, void sk_stop_timer(struct sock *sk, struct timer_list *timer); -int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb, - unsigned int flags, +int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue, + struct sk_buff *skb, unsigned int flags, void (*destructor)(struct sock *sk, struct sk_buff *skb)); int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); @@ -2033,7 +2059,7 @@ static inline unsigned long sock_wspace(struct sock *sk) int amt = 0; if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { - amt = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); + amt = sk->sk_sndbuf - refcount_read(&sk->sk_wmem_alloc); if (amt < 0) amt = 0; } @@ -2114,7 +2140,7 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag); */ static inline bool sock_writeable(const struct sock *sk) { - return atomic_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1); + return refcount_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1); } static inline gfp_t gfp_any(void) @@ -2209,6 +2235,7 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb); +#define SK_DEFAULT_STAMP (-1L * NSEC_PER_SEC) static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { @@ -2219,8 +2246,10 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, if (sk->sk_flags & FLAGS_TS_OR_DROPS || sk->sk_tsflags & TSFLAGS_ANY) __sock_recv_ts_and_drops(msg, sk, skb); - else + else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP))) sk->sk_stamp = skb->tstamp; + else if (unlikely(sk->sk_stamp == SK_DEFAULT_STAMP)) + sk->sk_stamp = 0; } void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags); @@ -2231,7 +2260,7 @@ void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags); * @tsflags: timestamping flags to use * @tx_flags: completed with instructions for time stamping * - * Note : callers should take care of initial *tx_flags value (usually 0) + * Note: callers should take care of initial ``*tx_flags`` value (usually 0) */ static inline void sock_tx_timestamp(const struct sock *sk, __u16 tsflags, __u8 *tx_flags) @@ -2332,6 +2361,8 @@ bool sk_ns_capable(const struct sock *sk, bool sk_capable(const struct sock *sk, int cap); bool sk_net_capable(const struct sock *sk, int cap); +void sk_get_meminfo(const struct sock *sk, u32 *meminfo); + extern __u32 sysctl_wmem_max; extern __u32 sysctl_rmem_max; diff --git a/include/net/switchdev.h b/include/net/switchdev.h index eba80c4fc56f..8ae9e3b6392e 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -46,8 +46,11 @@ enum switchdev_attr_id { SWITCHDEV_ATTR_ID_PORT_PARENT_ID, SWITCHDEV_ATTR_ID_PORT_STP_STATE, SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, + SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT, + SWITCHDEV_ATTR_ID_PORT_MROUTER, SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME, SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING, + SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED, }; struct switchdev_attr { @@ -60,8 +63,11 @@ struct switchdev_attr { struct netdev_phys_item_id ppid; /* PORT_PARENT_ID */ u8 stp_state; /* PORT_STP_STATE */ unsigned long brport_flags; /* PORT_BRIDGE_FLAGS */ + unsigned long brport_flags_support; /* PORT_BRIDGE_FLAGS_SUPPORT */ + bool mrouter; /* PORT_MROUTER */ clock_t ageing_time; /* BRIDGE_AGEING_TIME */ bool vlan_filtering; /* BRIDGE_VLAN_FILTERING */ + bool mc_disabled; /* MC_DISABLED */ } u; }; @@ -149,8 +155,11 @@ struct switchdev_ops { }; enum switchdev_notifier_type { - SWITCHDEV_FDB_ADD = 1, - SWITCHDEV_FDB_DEL, + SWITCHDEV_FDB_ADD_TO_BRIDGE = 1, + SWITCHDEV_FDB_DEL_TO_BRIDGE, + SWITCHDEV_FDB_ADD_TO_DEVICE, + SWITCHDEV_FDB_DEL_TO_DEVICE, + SWITCHDEV_FDB_OFFLOADED, }; struct switchdev_notifier_info { @@ -208,6 +217,8 @@ void switchdev_port_fwd_mark_set(struct net_device *dev, bool switchdev_port_same_parent_id(struct net_device *a, struct net_device *b); + +#define SWITCHDEV_SET_OPS(netdev, ops) ((netdev)->switchdev_ops = (ops)) #else static inline void switchdev_deferred_process(void) @@ -313,6 +324,8 @@ static inline bool switchdev_port_same_parent_id(struct net_device *a, return false; } +#define SWITCHDEV_SET_OPS(netdev, ops) do {} while (0) + #endif #endif /* _LINUX_SWITCHDEV_H_ */ diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h index f31fb6331a53..3248beaf16b0 100644 --- a/include/net/tc_act/tc_csum.h +++ b/include/net/tc_act/tc_csum.h @@ -3,6 +3,7 @@ #include <linux/types.h> #include <net/act_api.h> +#include <linux/tc_act/tc_csum.h> struct tcf_csum { struct tc_action common; @@ -11,4 +12,18 @@ struct tcf_csum { }; #define to_tcf_csum(a) ((struct tcf_csum *)a) +static inline bool is_tcf_csum(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + if (a->ops && a->ops->type == TCA_ACT_CSUM) + return true; +#endif + return false; +} + +static inline u32 tcf_csum_update_flags(const struct tc_action *a) +{ + return to_tcf_csum(a)->update_flags; +} + #endif /* __NET_TC_CSUM_H */ diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h index b6f173910226..d576374c4d6f 100644 --- a/include/net/tc_act/tc_gact.h +++ b/include/net/tc_act/tc_gact.h @@ -15,7 +15,7 @@ struct tcf_gact { }; #define to_gact(a) ((struct tcf_gact *)a) -static inline bool is_tcf_gact_shot(const struct tc_action *a) +static inline bool __is_tcf_gact_act(const struct tc_action *a, int act) { #ifdef CONFIG_NET_CLS_ACT struct tcf_gact *gact; @@ -24,10 +24,21 @@ static inline bool is_tcf_gact_shot(const struct tc_action *a) return false; gact = to_gact(a); - if (gact->tcf_action == TC_ACT_SHOT) + if (gact->tcf_action == act) return true; #endif return false; } + +static inline bool is_tcf_gact_shot(const struct tc_action *a) +{ + return __is_tcf_gact_act(a, TC_ACT_SHOT); +} + +static inline bool is_tcf_gact_trap(const struct tc_action *a) +{ + return __is_tcf_gact_act(a, TC_ACT_TRAP); +} + #endif /* __NET_TC_GACT_H */ diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h index 9fd2bea0a6e0..30ba459ddd34 100644 --- a/include/net/tc_act/tc_ife.h +++ b/include/net/tc_act/tc_ife.h @@ -6,7 +6,6 @@ #include <linux/rtnetlink.h> #include <linux/module.h> -#define IFE_METAHDRLEN 2 struct tcf_ife_info { struct tc_action common; u8 eth_dst[ETH_ALEN]; @@ -45,8 +44,6 @@ struct tcf_meta_ops { int ife_get_meta_u32(struct sk_buff *skb, struct tcf_meta_info *mi); int ife_get_meta_u16(struct sk_buff *skb, struct tcf_meta_info *mi); -int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, - const void *dval); int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval, gfp_t gfp); int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval, gfp_t gfp); int ife_check_meta_u32(u32 metaval, struct tcf_meta_info *mi); diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h index 29e38d6823df..a46c3f2ace70 100644 --- a/include/net/tc_act/tc_pedit.h +++ b/include/net/tc_act/tc_pedit.h @@ -2,13 +2,64 @@ #define __NET_TC_PED_H #include <net/act_api.h> +#include <linux/tc_act/tc_pedit.h> + +struct tcf_pedit_key_ex { + enum pedit_header_type htype; + enum pedit_cmd cmd; +}; struct tcf_pedit { struct tc_action common; unsigned char tcfp_nkeys; unsigned char tcfp_flags; struct tc_pedit_key *tcfp_keys; + struct tcf_pedit_key_ex *tcfp_keys_ex; }; #define to_pedit(a) ((struct tcf_pedit *)a) +static inline bool is_tcf_pedit(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + if (a->ops && a->ops->type == TCA_ACT_PEDIT) + return true; +#endif + return false; +} + +static inline int tcf_pedit_nkeys(const struct tc_action *a) +{ + return to_pedit(a)->tcfp_nkeys; +} + +static inline u32 tcf_pedit_htype(const struct tc_action *a, int index) +{ + if (to_pedit(a)->tcfp_keys_ex) + return to_pedit(a)->tcfp_keys_ex[index].htype; + + return TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK; +} + +static inline u32 tcf_pedit_cmd(const struct tc_action *a, int index) +{ + if (to_pedit(a)->tcfp_keys_ex) + return to_pedit(a)->tcfp_keys_ex[index].cmd; + + return __PEDIT_CMD_MAX; +} + +static inline u32 tcf_pedit_mask(const struct tc_action *a, int index) +{ + return to_pedit(a)->tcfp_keys[index].mask; +} + +static inline u32 tcf_pedit_val(const struct tc_action *a, int index) +{ + return to_pedit(a)->tcfp_keys[index].val; +} + +static inline u32 tcf_pedit_offset(const struct tc_action *a, int index) +{ + return to_pedit(a)->tcfp_keys[index].off; +} #endif /* __NET_TC_PED_H */ diff --git a/include/net/tc_act/tc_sample.h b/include/net/tc_act/tc_sample.h new file mode 100644 index 000000000000..89e9305be880 --- /dev/null +++ b/include/net/tc_act/tc_sample.h @@ -0,0 +1,50 @@ +#ifndef __NET_TC_SAMPLE_H +#define __NET_TC_SAMPLE_H + +#include <net/act_api.h> +#include <linux/tc_act/tc_sample.h> +#include <net/psample.h> + +struct tcf_sample { + struct tc_action common; + u32 rate; + bool truncate; + u32 trunc_size; + struct psample_group __rcu *psample_group; + u32 psample_group_num; + struct list_head tcfm_list; + struct rcu_head rcu; +}; +#define to_sample(a) ((struct tcf_sample *)a) + +static inline bool is_tcf_sample(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + return a->ops && a->ops->type == TCA_ACT_SAMPLE; +#else + return false; +#endif +} + +static inline __u32 tcf_sample_rate(const struct tc_action *a) +{ + return to_sample(a)->rate; +} + +static inline bool tcf_sample_truncate(const struct tc_action *a) +{ + return to_sample(a)->truncate; +} + +static inline int tcf_sample_trunc_size(const struct tc_action *a) +{ + return to_sample(a)->trunc_size; +} + +static inline struct psample_group * +tcf_sample_psample_group(const struct tc_action *a) +{ + return rcu_dereference(to_sample(a)->psample_group); +} + +#endif /* __NET_TC_SAMPLE_H */ diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h index 48cca321ee6c..c2090df944ff 100644 --- a/include/net/tc_act/tc_vlan.h +++ b/include/net/tc_act/tc_vlan.h @@ -13,9 +13,6 @@ #include <net/act_api.h> #include <linux/tc_act/tc_vlan.h> -#define VLAN_F_POP 0x1 -#define VLAN_F_PUSH 0x2 - struct tcf_vlan { struct tc_action common; int tcfv_action; @@ -49,4 +46,9 @@ static inline __be16 tcf_vlan_push_proto(const struct tc_action *a) return to_vlan(a)->tcfv_push_proto; } +static inline u8 tcf_vlan_push_prio(const struct tc_action *a) +{ + return to_vlan(a)->tcfv_push_prio; +} + #endif /* __NET_TC_VLAN_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 6061963cca98..70483296157f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -46,6 +46,10 @@ #include <linux/seq_file.h> #include <linux/memcontrol.h> +#include <linux/bpf.h> +#include <linux/filter.h> +#include <linux/bpf-cgroup.h> + extern struct inet_hashinfo tcp_hashinfo; extern struct percpu_counter tcp_orphan_count; @@ -78,6 +82,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* Maximal number of ACKs sent quickly to accelerate slow-start. */ #define TCP_MAX_QUICKACKS 16U +/* Maximal number of window scale according to RFC1323 */ +#define TCP_MAX_WSCALE 14U + /* urg_data states */ #define TCP_URG_VALID 0x0100 #define TCP_URG_NOTYET 0x0200 @@ -143,6 +150,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes * for local resources. */ +#define TCP_REO_TIMEOUT_MIN (2000) /* Min RACK reordering timeout in usec */ #define TCP_KEEPALIVE_TIME (120*60*HZ) /* two hours */ #define TCP_KEEPALIVE_PROBES 9 /* Max of 9 keepalive probes */ @@ -231,12 +239,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); */ #define TFO_SERVER_WO_SOCKOPT1 0x400 -extern struct inet_timewait_death_row tcp_death_row; /* sysctl variables for tcp */ -extern int sysctl_tcp_timestamps; -extern int sysctl_tcp_window_scaling; -extern int sysctl_tcp_sack; extern int sysctl_tcp_fastopen; extern int sysctl_tcp_retrans_collapse; extern int sysctl_tcp_stdurg; @@ -262,6 +266,9 @@ extern int sysctl_tcp_slow_start_after_idle; extern int sysctl_tcp_thin_linear_timeouts; extern int sysctl_tcp_thin_dupack; extern int sysctl_tcp_early_retrans; +extern int sysctl_tcp_recovery; +#define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ + extern int sysctl_tcp_limit_output_bytes; extern int sysctl_tcp_challenge_ack_limit; extern int sysctl_tcp_min_tso_segs; @@ -273,7 +280,7 @@ extern int sysctl_tcp_pacing_ca_ratio; extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; -extern int tcp_memory_pressure; +extern unsigned long tcp_memory_pressure; /* optimized version of sk_under_memory_pressure() for TCP sockets */ static inline bool tcp_under_memory_pressure(const struct sock *sk) @@ -347,6 +354,8 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); +ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, + size_t size, int flags); void tcp_release_cb(struct sock *sk); void tcp_wfree(struct sk_buff *skb); void tcp_write_timer_handler(struct sock *sk); @@ -398,15 +407,12 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, int tcp_child_process(struct sock *parent, struct sock *child, struct sk_buff *skb); void tcp_enter_loss(struct sock *sk); +void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int flag); void tcp_clear_retrans(struct tcp_sock *tp); void tcp_update_metrics(struct sock *sk); void tcp_init_metrics(struct sock *sk); void tcp_metrics_init(void); -bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, - bool paws_check, bool timestamps); -bool tcp_remember_stamp(struct sock *sk); -bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw); -void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst); +bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst); void tcp_disable_fack(struct tcp_sock *tp); void tcp_close(struct sock *sk, long timeout); void tcp_init_sock(struct sock *sk); @@ -424,7 +430,7 @@ void tcp_set_keepalive(struct sock *sk, int val); void tcp_syn_ack_timeout(const struct request_sock *req); int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); -void tcp_parse_options(const struct sk_buff *skb, +void tcp_parse_options(const struct net *net, const struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab, struct tcp_fastopen_cookie *foc); const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); @@ -467,7 +473,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb); /* From syncookies.c */ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct dst_entry *dst); + struct dst_entry *dst, u32 tsoff); int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, u32 cookie); struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); @@ -516,8 +522,9 @@ static inline u32 tcp_cookie_time(void) u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th, u16 *mssp); __u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss); -__u32 cookie_init_timestamp(struct request_sock *req); -bool cookie_timestamp_decode(struct tcp_options_received *opt); +u64 cookie_init_timestamp(struct request_sock *req); +bool cookie_timestamp_decode(const struct net *net, + struct tcp_options_received *opt); bool cookie_ecn_ok(const struct tcp_options_received *opt, const struct net *net, const struct dst_entry *dst); @@ -542,6 +549,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs); void tcp_retransmit_timer(struct sock *sk); void tcp_xmit_retransmit_queue(struct sock *); void tcp_simple_retransmit(struct sock *); +void tcp_enter_recovery(struct sock *sk, bool ece_ack); int tcp_trim_head(struct sock *, struct sk_buff *, u32); int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int, gfp_t); @@ -560,7 +568,6 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb, const struct sk_buff *next_skb); /* tcp_input.c */ -void tcp_resume_early_retransmit(struct sock *sk); void tcp_rearm_rto(struct sock *sk); void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req); void tcp_reset(struct sock *sk); @@ -571,6 +578,7 @@ void tcp_fin(struct sock *sk); void tcp_init_xmit_timers(struct sock *); static inline void tcp_clear_xmit_timers(struct sock *sk) { + hrtimer_cancel(&tcp_sk(sk)->pacing_timer); inet_csk_clear_xmit_timers(sk); } @@ -696,17 +704,61 @@ u32 __tcp_select_window(struct sock *sk); void tcp_send_window_probe(struct sock *sk); -/* TCP timestamps are only 32-bits, this causes a slight - * complication on 64-bit systems since we store a snapshot - * of jiffies in the buffer control blocks below. We decided - * to use only the low 32-bits of jiffies and hide the ugly - * casts with the following macro. +/* TCP uses 32bit jiffies to save some space. + * Note that this is different from tcp_time_stamp, which + * historically has been the same until linux-4.13. + */ +#define tcp_jiffies32 ((u32)jiffies) + +/* + * Deliver a 32bit value for TCP timestamp option (RFC 7323) + * It is no longer tied to jiffies, but to 1 ms clock. + * Note: double check if you want to use tcp_jiffies32 instead of this. */ -#define tcp_time_stamp ((__u32)(jiffies)) +#define TCP_TS_HZ 1000 + +static inline u64 tcp_clock_ns(void) +{ + return local_clock(); +} + +static inline u64 tcp_clock_us(void) +{ + return div_u64(tcp_clock_ns(), NSEC_PER_USEC); +} + +/* This should only be used in contexts where tp->tcp_mstamp is up to date */ +static inline u32 tcp_time_stamp(const struct tcp_sock *tp) +{ + return div_u64(tp->tcp_mstamp, USEC_PER_SEC / TCP_TS_HZ); +} + +/* Could use tcp_clock_us() / 1000, but this version uses a single divide */ +static inline u32 tcp_time_stamp_raw(void) +{ + return div_u64(tcp_clock_ns(), NSEC_PER_SEC / TCP_TS_HZ); +} + + +/* Refresh 1us clock of a TCP socket, + * ensuring monotically increasing values. + */ +static inline void tcp_mstamp_refresh(struct tcp_sock *tp) +{ + u64 val = tcp_clock_us(); + + if (val > tp->tcp_mstamp) + tp->tcp_mstamp = val; +} + +static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0) +{ + return max_t(s64, t1 - t0, 0); +} static inline u32 tcp_skb_timestamp(const struct sk_buff *skb) { - return skb->skb_mstamp.stamp_jiffies; + return div_u64(skb->skb_mstamp, USEC_PER_SEC / TCP_TS_HZ); } @@ -771,9 +823,9 @@ struct tcp_skb_cb { /* pkts S/ACKed so far upon tx of skb, incl retrans: */ __u32 delivered; /* start of send pipeline phase */ - struct skb_mstamp first_tx_mstamp; + u64 first_tx_mstamp; /* when we reached the "delivered" count */ - struct skb_mstamp delivered_mstamp; + u64 delivered_mstamp; } tx; /* only used for outgoing skbs */ union { struct inet_skb_parm h4; @@ -889,7 +941,7 @@ struct ack_sample { * A sample is invalid if "delivered" or "interval_us" is negative. */ struct rate_sample { - struct skb_mstamp prior_mstamp; /* starting timestamp for interval */ + u64 prior_mstamp; /* starting timestamp for interval */ u32 prior_delivered; /* tp->delivered at "prior_mstamp" */ s32 delivered; /* number of packets delivered over interval */ long interval_us; /* time for tp->delivered to incr "delivered" */ @@ -921,7 +973,7 @@ struct tcp_congestion_ops { void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev); /* call when ack arrives (optional) */ void (*in_ack_event)(struct sock *sk, u32 flags); - /* new value of cwnd after loss (optional) */ + /* new value of cwnd after loss (required) */ u32 (*undo_cwnd)(struct sock *sk); /* hook for packet ack accounting (optional) */ void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); @@ -952,7 +1004,9 @@ void tcp_get_default_congestion_control(char *name); void tcp_get_available_congestion_control(char *buf, size_t len); void tcp_get_allowed_congestion_control(char *buf, size_t len); int tcp_set_allowed_congestion_control(char *allowed); -int tcp_set_congestion_control(struct sock *sk, const char *name); +int tcp_set_congestion_control(struct sock *sk, const char *name, bool load); +void tcp_reinit_congestion_control(struct sock *sk, + const struct tcp_congestion_ops *ca); u32 tcp_slow_start(struct tcp_sock *tp, u32 acked); void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked); @@ -1001,7 +1055,7 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb); void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, struct rate_sample *rs); void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, - struct skb_mstamp *now, struct rate_sample *rs); + struct rate_sample *rs); void tcp_rate_check_app_limited(struct sock *sk); /* These functions determine how the current flow behaves in respect of SACK @@ -1032,23 +1086,6 @@ static inline void tcp_enable_fack(struct tcp_sock *tp) tp->rx_opt.sack_ok |= TCP_FACK_ENABLED; } -/* TCP early-retransmit (ER) is similar to but more conservative than - * the thin-dupack feature. Enable ER only if thin-dupack is disabled. - */ -static inline void tcp_enable_early_retrans(struct tcp_sock *tp) -{ - struct net *net = sock_net((struct sock *)tp); - - tp->do_early_retrans = sysctl_tcp_early_retrans && - sysctl_tcp_early_retrans < 4 && !sysctl_tcp_thin_dupack && - net->ipv4.sysctl_tcp_reordering == 3; -} - -static inline void tcp_disable_early_retrans(struct tcp_sock *tp) -{ - tp->do_early_retrans = 0; -} - static inline unsigned int tcp_left_out(const struct tcp_sock *tp) { return tp->sacked_out + tp->lost_out; @@ -1248,12 +1285,14 @@ void tcp_cwnd_restart(struct sock *sk, s32 delta); static inline void tcp_slow_start_after_idle_check(struct sock *sk) { + const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; struct tcp_sock *tp = tcp_sk(sk); s32 delta; - if (!sysctl_tcp_slow_start_after_idle || tp->packets_out) + if (!sysctl_tcp_slow_start_after_idle || tp->packets_out || + ca_ops->cong_control) return; - delta = tcp_time_stamp - tp->lsndtime; + delta = tcp_jiffies32 - tp->lsndtime; if (delta > inet_csk(sk)->icsk_rto) tcp_cwnd_restart(sk, delta); } @@ -1265,9 +1304,11 @@ void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd, static inline int tcp_win_from_space(int space) { - return sysctl_tcp_adv_win_scale<=0 ? - (space>>(-sysctl_tcp_adv_win_scale)) : - space - (space>>sysctl_tcp_adv_win_scale); + int tcp_adv_win_scale = sysctl_tcp_adv_win_scale; + + return tcp_adv_win_scale <= 0 ? + (space>>(-tcp_adv_win_scale)) : + space - (space>>tcp_adv_win_scale); } /* Note: caller must be prepared to deal with negative returns */ @@ -1287,6 +1328,7 @@ extern void tcp_openreq_init_rwin(struct request_sock *req, const struct dst_entry *dst); void tcp_enter_memory_pressure(struct sock *sk); +void tcp_leave_memory_pressure(struct sock *sk); static inline int keepalive_intvl_when(const struct tcp_sock *tp) { @@ -1313,8 +1355,8 @@ static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) { const struct inet_connection_sock *icsk = &tp->inet_conn; - return min_t(u32, tcp_time_stamp - icsk->icsk_ack.lrcvtime, - tcp_time_stamp - tp->rcv_tstamp); + return min_t(u32, tcp_jiffies32 - icsk->icsk_ack.lrcvtime, + tcp_jiffies32 - tp->rcv_tstamp); } static inline int tcp_fin_time(const struct sock *sk) @@ -1405,6 +1447,7 @@ struct tcp_md5sig_key { u8 keylen; u8 family; /* AF_INET or AF_INET6 */ union tcp_md5_addr addr; + u8 prefixlen; u8 key[TCP_MD5SIG_MAXKEYLEN]; struct rcu_head rcu; }; @@ -1448,9 +1491,10 @@ struct tcp_md5sig_pool { int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, const struct sock *sk, const struct sk_buff *skb); int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, - int family, const u8 *newkey, u8 newkeylen, gfp_t gfp); + int family, u8 prefixlen, const u8 *newkey, u8 newkeylen, + gfp_t gfp); int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, - int family); + int family, u8 prefixlen); struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, const struct sock *addr_sk); @@ -1506,6 +1550,9 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct tcp_fastopen_cookie *foc, struct dst_entry *dst); void tcp_fastopen_init_key_once(bool publish); +bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, + struct tcp_fastopen_cookie *cookie); +bool tcp_fastopen_defer_connect(struct sock *sk, int *err); #define TCP_FASTOPEN_KEY_LENGTH 16 /* Fastopen key context */ @@ -1515,6 +1562,12 @@ struct tcp_fastopen_context { struct rcu_head rcu; }; +extern unsigned int sysctl_tcp_fastopen_blackhole_timeout; +void tcp_fastopen_active_disable(struct sock *sk); +bool tcp_fastopen_active_should_disable(struct sock *sk); +void tcp_fastopen_active_disable_ofo_check(struct sock *sk); +void tcp_fastopen_active_timeout_reset(void); + /* Latencies incurred by various limits for a sender. They are * chronograph-like stats that are mutually exclusive. */ @@ -1801,6 +1854,7 @@ struct tcp_sock_af_ops { const struct sock *sk, const struct sk_buff *skb); int (*md5_parse)(struct sock *sk, + int optname, char __user *optval, int optlen); #endif @@ -1824,9 +1878,9 @@ struct tcp_request_sock_ops { __u16 *mss); #endif struct dst_entry *(*route_req)(const struct sock *sk, struct flowi *fl, - const struct request_sock *req, - bool *strict); - __u32 (*init_seq)(const struct sk_buff *skb, u32 *tsoff); + const struct request_sock *req); + u32 (*init_seq)(const struct sk_buff *skb); + u32 (*init_ts_off)(const struct net *net, const struct sk_buff *skb); int (*send_synack)(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, struct tcp_fastopen_cookie *foc, @@ -1857,17 +1911,10 @@ void tcp_v4_init(void); void tcp_init(void); /* tcp_recovery.c */ - -/* Flags to enable various loss recovery features. See below */ -extern int sysctl_tcp_recovery; - -/* Use TCP RACK to detect (some) tail and retransmit losses */ -#define TCP_RACK_LOST_RETRANS 0x1 - -extern int tcp_rack_mark_lost(struct sock *sk); - -extern void tcp_rack_advance(struct tcp_sock *tp, - const struct skb_mstamp *xmit_time, u8 sacked); +extern void tcp_rack_mark_lost(struct sock *sk); +extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, + u64 xmit_time); +extern void tcp_rack_reo_timeout(struct sock *sk); /* * Save and compile IPv4 options, return a pointer to it @@ -1953,4 +2000,89 @@ static inline void tcp_listendrop(const struct sock *sk) __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS); } +enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer); + +/* + * Interface for adding Upper Level Protocols over TCP + */ + +#define TCP_ULP_NAME_MAX 16 +#define TCP_ULP_MAX 128 +#define TCP_ULP_BUF_MAX (TCP_ULP_NAME_MAX*TCP_ULP_MAX) + +struct tcp_ulp_ops { + struct list_head list; + + /* initialize ulp */ + int (*init)(struct sock *sk); + /* cleanup ulp */ + void (*release)(struct sock *sk); + + char name[TCP_ULP_NAME_MAX]; + struct module *owner; +}; +int tcp_register_ulp(struct tcp_ulp_ops *type); +void tcp_unregister_ulp(struct tcp_ulp_ops *type); +int tcp_set_ulp(struct sock *sk, const char *name); +void tcp_get_available_ulp(char *buf, size_t len); +void tcp_cleanup_ulp(struct sock *sk); + +/* Call BPF_SOCK_OPS program that returns an int. If the return value + * is < 0, then the BPF op failed (for example if the loaded BPF + * program does not support the chosen operation or there is no BPF + * program loaded). + */ +#ifdef CONFIG_BPF +static inline int tcp_call_bpf(struct sock *sk, int op) +{ + struct bpf_sock_ops_kern sock_ops; + int ret; + + if (sk_fullsock(sk)) + sock_owned_by_me(sk); + + memset(&sock_ops, 0, sizeof(sock_ops)); + sock_ops.sk = sk; + sock_ops.op = op; + + ret = BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops); + if (ret == 0) + ret = sock_ops.reply; + else + ret = -1; + return ret; +} +#else +static inline int tcp_call_bpf(struct sock *sk, int op) +{ + return -EPERM; +} +#endif + +static inline u32 tcp_timeout_init(struct sock *sk) +{ + int timeout; + + timeout = tcp_call_bpf(sk, BPF_SOCK_OPS_TIMEOUT_INIT); + + if (timeout <= 0) + timeout = TCP_TIMEOUT_INIT; + return timeout; +} + +static inline u32 tcp_rwnd_init_bpf(struct sock *sk) +{ + int rwnd; + + rwnd = tcp_call_bpf(sk, BPF_SOCK_OPS_RWND_INIT); + + if (rwnd < 0) + rwnd = 0; + return rwnd; +} + +static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk) +{ + return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN) == 1); +} #endif /* _TCP_H */ diff --git a/include/net/tls.h b/include/net/tls.h new file mode 100644 index 000000000000..b89d397dd62f --- /dev/null +++ b/include/net/tls.h @@ -0,0 +1,237 @@ +/* + * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved. + * Copyright (c) 2016-2017, Dave Watson <[email protected]>. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _TLS_OFFLOAD_H +#define _TLS_OFFLOAD_H + +#include <linux/types.h> + +#include <uapi/linux/tls.h> + + +/* Maximum data size carried in a TLS record */ +#define TLS_MAX_PAYLOAD_SIZE ((size_t)1 << 14) + +#define TLS_HEADER_SIZE 5 +#define TLS_NONCE_OFFSET TLS_HEADER_SIZE + +#define TLS_CRYPTO_INFO_READY(info) ((info)->cipher_type) + +#define TLS_RECORD_TYPE_DATA 0x17 + +#define TLS_AAD_SPACE_SIZE 13 + +struct tls_sw_context { + struct crypto_aead *aead_send; + + /* Sending context */ + char aad_space[TLS_AAD_SPACE_SIZE]; + + unsigned int sg_plaintext_size; + int sg_plaintext_num_elem; + struct scatterlist sg_plaintext_data[MAX_SKB_FRAGS]; + + unsigned int sg_encrypted_size; + int sg_encrypted_num_elem; + struct scatterlist sg_encrypted_data[MAX_SKB_FRAGS]; + + /* AAD | sg_plaintext_data | sg_tag */ + struct scatterlist sg_aead_in[2]; + /* AAD | sg_encrypted_data (data contain overhead for hdr&iv&tag) */ + struct scatterlist sg_aead_out[2]; +}; + +enum { + TLS_PENDING_CLOSED_RECORD +}; + +struct tls_context { + union { + struct tls_crypto_info crypto_send; + struct tls12_crypto_info_aes_gcm_128 crypto_send_aes_gcm_128; + }; + + void *priv_ctx; + + u16 prepend_size; + u16 tag_size; + u16 overhead_size; + u16 iv_size; + char *iv; + u16 rec_seq_size; + char *rec_seq; + + struct scatterlist *partially_sent_record; + u16 partially_sent_offset; + unsigned long flags; + + u16 pending_open_record_frags; + int (*push_pending_record)(struct sock *sk, int flags); + void (*free_resources)(struct sock *sk); + + void (*sk_write_space)(struct sock *sk); + void (*sk_proto_close)(struct sock *sk, long timeout); + + int (*setsockopt)(struct sock *sk, int level, + int optname, char __user *optval, + unsigned int optlen); + int (*getsockopt)(struct sock *sk, int level, + int optname, char __user *optval, + int __user *optlen); +}; + +int wait_on_pending_writer(struct sock *sk, long *timeo); +int tls_sk_query(struct sock *sk, int optname, char __user *optval, + int __user *optlen); +int tls_sk_attach(struct sock *sk, int optname, char __user *optval, + unsigned int optlen); + + +int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx); +int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); +int tls_sw_sendpage(struct sock *sk, struct page *page, + int offset, size_t size, int flags); +void tls_sw_close(struct sock *sk, long timeout); + +void tls_sk_destruct(struct sock *sk, struct tls_context *ctx); +void tls_icsk_clean_acked(struct sock *sk); + +int tls_push_sg(struct sock *sk, struct tls_context *ctx, + struct scatterlist *sg, u16 first_offset, + int flags); +int tls_push_pending_closed_record(struct sock *sk, struct tls_context *ctx, + int flags, long *timeo); + +static inline bool tls_is_pending_closed_record(struct tls_context *ctx) +{ + return test_bit(TLS_PENDING_CLOSED_RECORD, &ctx->flags); +} + +static inline int tls_complete_pending_work(struct sock *sk, + struct tls_context *ctx, + int flags, long *timeo) +{ + int rc = 0; + + if (unlikely(sk->sk_write_pending)) + rc = wait_on_pending_writer(sk, timeo); + + if (!rc && tls_is_pending_closed_record(ctx)) + rc = tls_push_pending_closed_record(sk, ctx, flags, timeo); + + return rc; +} + +static inline bool tls_is_partially_sent_record(struct tls_context *ctx) +{ + return !!ctx->partially_sent_record; +} + +static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx) +{ + return tls_ctx->pending_open_record_frags; +} + +static inline void tls_err_abort(struct sock *sk) +{ + sk->sk_err = -EBADMSG; + sk->sk_error_report(sk); +} + +static inline bool tls_bigint_increment(unsigned char *seq, int len) +{ + int i; + + for (i = len - 1; i >= 0; i--) { + ++seq[i]; + if (seq[i] != 0) + break; + } + + return (i == -1); +} + +static inline void tls_advance_record_sn(struct sock *sk, + struct tls_context *ctx) +{ + if (tls_bigint_increment(ctx->rec_seq, ctx->rec_seq_size)) + tls_err_abort(sk); + tls_bigint_increment(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, + ctx->iv_size); +} + +static inline void tls_fill_prepend(struct tls_context *ctx, + char *buf, + size_t plaintext_len, + unsigned char record_type) +{ + size_t pkt_len, iv_size = ctx->iv_size; + + pkt_len = plaintext_len + iv_size + ctx->tag_size; + + /* we cover nonce explicit here as well, so buf should be of + * size KTLS_DTLS_HEADER_SIZE + KTLS_DTLS_NONCE_EXPLICIT_SIZE + */ + buf[0] = record_type; + buf[1] = TLS_VERSION_MINOR(ctx->crypto_send.version); + buf[2] = TLS_VERSION_MAJOR(ctx->crypto_send.version); + /* we can use IV for nonce explicit according to spec */ + buf[3] = pkt_len >> 8; + buf[4] = pkt_len & 0xFF; + memcpy(buf + TLS_NONCE_OFFSET, + ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv_size); +} + +static inline struct tls_context *tls_get_ctx(const struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + return icsk->icsk_ulp_data; +} + +static inline struct tls_sw_context *tls_sw_ctx( + const struct tls_context *tls_ctx) +{ + return (struct tls_sw_context *)tls_ctx->priv_ctx; +} + +static inline struct tls_offload_context *tls_offload_ctx( + const struct tls_context *tls_ctx) +{ + return (struct tls_offload_context *)tls_ctx->priv_ctx; +} + +int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg, + unsigned char *record_type); + +#endif /* _TLS_OFFLOAD_H */ diff --git a/include/net/udp.h b/include/net/udp.h index 1661791e8ca1..972ce4baab6b 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -204,7 +204,6 @@ static inline void udp_lib_close(struct sock *sk, long timeout) } int udp_lib_get_port(struct sock *sk, unsigned short snum, - int (*)(const struct sock *, const struct sock *, bool), unsigned int hash2_nulladdr); u32 udp_flow_hashrnd(void); @@ -250,13 +249,8 @@ void udp_destruct_sock(struct sock *sk); void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len); int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb); void udp_skb_destructor(struct sock *sk, struct sk_buff *skb); -static inline struct sk_buff * -__skb_recv_udp(struct sock *sk, unsigned int flags, int noblock, int *peeked, - int *off, int *err) -{ - return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), - udp_skb_destructor, peeked, off, err); -} +struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, + int noblock, int *peeked, int *off, int *err); static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags, int noblock, int *err) { @@ -308,6 +302,67 @@ struct sock *__udp6_lib_lookup(struct net *net, struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport); +/* UDP uses skb->dev_scratch to cache as much information as possible and avoid + * possibly multiple cache miss on dequeue() + */ +#if BITS_PER_LONG == 64 + +/* truesize, len and the bit needed to compute skb_csum_unnecessary will be on + * cold cache lines at recvmsg time. + * skb->len can be stored on 16 bits since the udp header has been already + * validated and pulled. + */ +struct udp_dev_scratch { + u32 truesize; + u16 len; + bool is_linear; + bool csum_unnecessary; +}; + +static inline unsigned int udp_skb_len(struct sk_buff *skb) +{ + return ((struct udp_dev_scratch *)&skb->dev_scratch)->len; +} + +static inline bool udp_skb_csum_unnecessary(struct sk_buff *skb) +{ + return ((struct udp_dev_scratch *)&skb->dev_scratch)->csum_unnecessary; +} + +static inline bool udp_skb_is_linear(struct sk_buff *skb) +{ + return ((struct udp_dev_scratch *)&skb->dev_scratch)->is_linear; +} + +#else +static inline unsigned int udp_skb_len(struct sk_buff *skb) +{ + return skb->len; +} + +static inline bool udp_skb_csum_unnecessary(struct sk_buff *skb) +{ + return skb_csum_unnecessary(skb); +} + +static inline bool udp_skb_is_linear(struct sk_buff *skb) +{ + return !skb_is_nonlinear(skb); +} +#endif + +static inline int copy_linear_skb(struct sk_buff *skb, int len, int off, + struct iov_iter *to) +{ + int n, copy = len - off; + + n = copy_to_iter(skb->data + off, copy, to); + if (n == copy) + return 0; + + return -EFAULT; +} + /* * SNMP statistics for UDP and UDP-Lite */ @@ -373,4 +428,5 @@ void udp_encap_enable(void); #if IS_ENABLED(CONFIG_IPV6) void udpv6_encap_enable(void); #endif + #endif /* _UDP_H */ diff --git a/include/net/udplite.h b/include/net/udplite.h index ea340524f99b..b7a18f63d86d 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -26,8 +26,8 @@ static __inline__ int udplite_getfrag(void *from, char *to, int offset, /* Designate sk as UDP-Lite socket */ static inline int udplite_sk_init(struct sock *sk) { + udp_init_sock(sk); udp_sk(sk)->pcflag = UDPLITE_BIT; - sk->sk_destruct = udp_destruct_sock; return 0; } diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 49a59202f85e..3f430e38ab82 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -183,7 +183,7 @@ struct vxlan_sock { struct hlist_node hlist; struct socket *sock; struct hlist_head vni_list[VNI_HASH_SIZE]; - atomic_t refcnt; + refcount_t refcnt; u32 flags; }; @@ -221,9 +221,17 @@ struct vxlan_config { bool no_share; }; +struct vxlan_dev_node { + struct hlist_node hlist; + struct vxlan_dev *vxlan; +}; + /* Pseudo network device */ struct vxlan_dev { - struct hlist_node hlist; /* vni hash table */ + struct vxlan_dev_node hlist4; /* vni hash table for IPv4 socket */ +#if IS_ENABLED(CONFIG_IPV6) + struct vxlan_dev_node hlist6; /* vni hash table for IPv6 socket */ +#endif struct list_head next; /* vxlan's per namespace list */ struct vxlan_sock __rcu *vn4_sock; /* listening socket for IPv4 */ #if IS_ENABLED(CONFIG_IPV6) @@ -232,7 +240,6 @@ struct vxlan_dev { struct net_device *dev; struct net *net; /* netns for packet i/o */ struct vxlan_rdst default_dst; /* default destination */ - u32 flags; /* VXLAN_F_* in vxlan.h */ struct timer_list age_timer; spinlock_t hash_lock; @@ -259,6 +266,7 @@ struct vxlan_dev { #define VXLAN_F_REMCSUM_NOPARTIAL 0x1000 #define VXLAN_F_COLLECT_METADATA 0x2000 #define VXLAN_F_GPE 0x4000 +#define VXLAN_F_IPV6_LINKLOCAL 0x8000 /* Flags that are used in the receive path. These flags must match in * order for a socket to be shareable @@ -273,6 +281,7 @@ struct vxlan_dev { /* Flags that can be set together with VXLAN_F_GPE. */ #define VXLAN_F_ALLOWED_GPE (VXLAN_F_GPE | \ VXLAN_F_IPV6 | \ + VXLAN_F_IPV6_LINKLOCAL | \ VXLAN_F_UDP_ZERO_CSUM_TX | \ VXLAN_F_UDP_ZERO_CSUM6_TX | \ VXLAN_F_UDP_ZERO_CSUM6_RX | \ diff --git a/include/net/wext.h b/include/net/wext.h index 345911965dbb..454ff763eeba 100644 --- a/include/net/wext.h +++ b/include/net/wext.h @@ -6,7 +6,7 @@ struct net; #ifdef CONFIG_WEXT_CORE -int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd, +int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd, void __user *arg); int compat_wext_handle_ioctl(struct net *net, unsigned int cmd, unsigned long arg); @@ -14,7 +14,7 @@ int compat_wext_handle_ioctl(struct net *net, unsigned int cmd, struct iw_statistics *get_wireless_stats(struct net_device *dev); int call_commit_handler(struct net_device *dev); #else -static inline int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd, +static inline int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd, void __user *arg) { return -EINVAL; diff --git a/include/net/x25.h b/include/net/x25.h index c383aa4edbf0..2609b57bd459 100644 --- a/include/net/x25.h +++ b/include/net/x25.h @@ -11,6 +11,7 @@ #define _X25_H #include <linux/x25.h> #include <linux/slab.h> +#include <linux/refcount.h> #include <net/sock.h> #define X25_ADDR_LEN 16 @@ -129,7 +130,7 @@ struct x25_route { struct x25_address address; unsigned int sigdigits; struct net_device *dev; - atomic_t refcnt; + refcount_t refcnt; }; struct x25_neigh { @@ -141,7 +142,7 @@ struct x25_neigh { unsigned long t20; struct timer_list t20timer; unsigned long global_facil_mask; - atomic_t refcnt; + refcount_t refcnt; }; struct x25_sock { @@ -242,12 +243,12 @@ void x25_link_free(void); /* x25_neigh.c */ static __inline__ void x25_neigh_hold(struct x25_neigh *nb) { - atomic_inc(&nb->refcnt); + refcount_inc(&nb->refcnt); } static __inline__ void x25_neigh_put(struct x25_neigh *nb) { - if (atomic_dec_and_test(&nb->refcnt)) + if (refcount_dec_and_test(&nb->refcnt)) kfree(nb); } @@ -265,12 +266,12 @@ void x25_route_free(void); static __inline__ void x25_route_hold(struct x25_route *rt) { - atomic_inc(&rt->refcnt); + refcount_inc(&rt->refcnt); } static __inline__ void x25_route_put(struct x25_route *rt) { - if (atomic_dec_and_test(&rt->refcnt)) + if (refcount_dec_and_test(&rt->refcnt)) kfree(rt); } @@ -298,10 +299,10 @@ void x25_check_rbuf(struct sock *); /* sysctl_net_x25.c */ #ifdef CONFIG_SYSCTL -void x25_register_sysctl(void); +int x25_register_sysctl(void); void x25_unregister_sysctl(void); #else -static inline void x25_register_sysctl(void) {}; +static inline int x25_register_sysctl(void) { return 0; }; static inline void x25_unregister_sysctl(void) {}; #endif /* CONFIG_SYSCTL */ diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 31947b9c21d6..c0916ab18d32 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -13,6 +13,7 @@ #include <linux/mutex.h> #include <linux/audit.h> #include <linux/slab.h> +#include <linux/refcount.h> #include <net/sock.h> #include <net/dst.h> @@ -120,6 +121,13 @@ struct xfrm_state_walk { struct xfrm_address_filter *filter; }; +struct xfrm_state_offload { + struct net_device *dev; + unsigned long offload_handle; + unsigned int num_exthdrs; + u8 flags; +}; + /* Full description of state of transformer. */ struct xfrm_state { possible_net_t xs_net; @@ -130,7 +138,7 @@ struct xfrm_state { struct hlist_node bysrc; struct hlist_node byspi; - atomic_t refcnt; + refcount_t refcnt; spinlock_t lock; struct xfrm_id id; @@ -207,12 +215,16 @@ struct xfrm_state { struct xfrm_lifetime_cur curlft; struct tasklet_hrtimer mtimer; + struct xfrm_state_offload xso; + /* used to fix curlft->add_time when changing date */ long saved_tmo; /* Last used time */ unsigned long lastused; + struct page_frag xfrag; + /* Reference to data common to all the instances of this * transformer. */ const struct xfrm_type *type; @@ -220,6 +232,8 @@ struct xfrm_state { struct xfrm_mode *inner_mode_iaf; struct xfrm_mode *outer_mode; + const struct xfrm_type_offload *type_offload; + /* Security context */ struct xfrm_sec_ctx *security; @@ -278,9 +292,7 @@ struct net_device; struct xfrm_type; struct xfrm_dst; struct xfrm_policy_afinfo { - unsigned short family; struct dst_ops *dst_ops; - void (*garbage_collect)(struct net *net); struct dst_entry *(*dst_lookup)(struct net *net, int tos, int oif, const xfrm_address_t *saddr, @@ -301,8 +313,8 @@ struct xfrm_policy_afinfo { struct dst_entry *(*blackhole_route)(struct net *net, struct dst_entry *orig); }; -int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo); -int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo); +int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int family); +void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo); void km_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c); void km_state_notify(struct xfrm_state *x, const struct km_event *c); @@ -314,12 +326,14 @@ void km_state_expired(struct xfrm_state *x, int hard, u32 portid); int __xfrm_state_delete(struct xfrm_state *x); struct xfrm_state_afinfo { - unsigned int family; - unsigned int proto; - __be16 eth_proto; - struct module *owner; - const struct xfrm_type *type_map[IPPROTO_MAX]; - struct xfrm_mode *mode_map[XFRM_MODE_MAX]; + unsigned int family; + unsigned int proto; + __be16 eth_proto; + struct module *owner; + const struct xfrm_type *type_map[IPPROTO_MAX]; + const struct xfrm_type_offload *type_offload_map[IPPROTO_MAX]; + struct xfrm_mode *mode_map[XFRM_MODE_MAX]; + int (*init_flags)(struct xfrm_state *x); void (*init_tempsel)(struct xfrm_selector *sel, const struct flowi *fl); @@ -343,17 +357,16 @@ struct xfrm_state_afinfo { int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo); int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo); struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); -void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); +struct xfrm_state_afinfo *xfrm_state_afinfo_get_rcu(unsigned int family); struct xfrm_input_afinfo { unsigned int family; - struct module *owner; int (*callback)(struct sk_buff *skb, u8 protocol, int err); }; -int xfrm_input_register_afinfo(struct xfrm_input_afinfo *afinfo); -int xfrm_input_unregister_afinfo(struct xfrm_input_afinfo *afinfo); +int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo); +int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo); void xfrm_state_delete_tunnel(struct xfrm_state *x); @@ -381,6 +394,18 @@ struct xfrm_type { int xfrm_register_type(const struct xfrm_type *type, unsigned short family); int xfrm_unregister_type(const struct xfrm_type *type, unsigned short family); +struct xfrm_type_offload { + char *description; + struct module *owner; + u8 proto; + void (*encap)(struct xfrm_state *, struct sk_buff *pskb); + int (*input_tail)(struct xfrm_state *x, struct sk_buff *skb); + int (*xmit)(struct xfrm_state *, struct sk_buff *pskb, netdev_features_t features); +}; + +int xfrm_register_type_offload(const struct xfrm_type_offload *type, unsigned short family); +int xfrm_unregister_type_offload(const struct xfrm_type_offload *type, unsigned short family); + struct xfrm_mode { /* * Remove encapsulation header. @@ -429,6 +454,16 @@ struct xfrm_mode { */ int (*output)(struct xfrm_state *x, struct sk_buff *skb); + /* + * Adjust pointers into the packet and do GSO segmentation. + */ + struct sk_buff *(*gso_segment)(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features); + + /* + * Adjust pointers into the packet when IPsec is done at layer2. + */ + void (*xmit)(struct xfrm_state *x, struct sk_buff *skb); + struct xfrm_state_afinfo *afinfo; struct module *owner; unsigned int encap; @@ -499,6 +534,7 @@ struct xfrm_tmpl { }; #define XFRM_MAX_DEPTH 6 +#define XFRM_MAX_OFFLOAD_DEPTH 1 struct xfrm_policy_walk_entry { struct list_head all; @@ -524,7 +560,7 @@ struct xfrm_policy { /* This lock only affects elements except for entry. */ rwlock_t lock; - atomic_t refcnt; + refcount_t refcnt; struct timer_list timer; struct flow_cache_object flo; @@ -586,7 +622,6 @@ struct xfrm_migrate { struct xfrm_mgr { struct list_head list; - char *id; int (*notify)(struct xfrm_state *x, const struct km_event *c); int (*acquire)(struct xfrm_state *x, struct xfrm_tmpl *, struct xfrm_policy *xp); struct xfrm_policy *(*compile_policy)(struct sock *sk, int opt, u8 *data, int len, int *dir); @@ -597,7 +632,8 @@ struct xfrm_mgr { u8 dir, u8 type, const struct xfrm_migrate *m, int num_bundles, - const struct xfrm_kmaddress *k); + const struct xfrm_kmaddress *k, + const struct xfrm_encap_tmpl *encap); bool (*is_alive)(const struct km_event *c); }; @@ -682,6 +718,7 @@ struct xfrm_spi_skb_cb { unsigned int daddroff; unsigned int family; + __be32 seq; }; #define XFRM_SPI_SKB_CB(__skb) ((struct xfrm_spi_skb_cb *)&((__skb)->cb[0])) @@ -779,14 +816,14 @@ static inline void xfrm_audit_state_icvfail(struct xfrm_state *x, static inline void xfrm_pol_hold(struct xfrm_policy *policy) { if (likely(policy != NULL)) - atomic_inc(&policy->refcnt); + refcount_inc(&policy->refcnt); } void xfrm_policy_destroy(struct xfrm_policy *policy); static inline void xfrm_pol_put(struct xfrm_policy *policy) { - if (atomic_dec_and_test(&policy->refcnt)) + if (refcount_dec_and_test(&policy->refcnt)) xfrm_policy_destroy(policy); } @@ -801,27 +838,27 @@ void __xfrm_state_destroy(struct xfrm_state *); static inline void __xfrm_state_put(struct xfrm_state *x) { - atomic_dec(&x->refcnt); + refcount_dec(&x->refcnt); } static inline void xfrm_state_put(struct xfrm_state *x) { - if (atomic_dec_and_test(&x->refcnt)) + if (refcount_dec_and_test(&x->refcnt)) __xfrm_state_destroy(x); } static inline void xfrm_state_hold(struct xfrm_state *x) { - atomic_inc(&x->refcnt); + refcount_inc(&x->refcnt); } static inline bool addr_match(const void *token1, const void *token2, - int prefixlen) + unsigned int prefixlen) { const __be32 *a1 = token1; const __be32 *a2 = token2; - int pdw; - int pbi; + unsigned int pdw; + unsigned int pbi; pdw = prefixlen >> 5; /* num of whole u32 in prefix */ pbi = prefixlen & 0x1f; /* num of bits in incomplete u32 in prefix */ @@ -845,9 +882,9 @@ static inline bool addr_match(const void *token1, const void *token2, static inline bool addr4_match(__be32 a1, __be32 a2, u8 prefixlen) { /* C99 6.5.7 (3): u32 << 32 is undefined behaviour */ - if (prefixlen == 0) + if (sizeof(long) == 4 && prefixlen == 0) return true; - return !((a1 ^ a2) & htonl(0xFFFFFFFFu << (32 - prefixlen))); + return !((a1 ^ a2) & htonl(~0UL << (32 - prefixlen))); } static __inline__ @@ -944,10 +981,6 @@ struct xfrm_dst { struct flow_cache_object flo; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; int num_pols, num_xfrms; -#ifdef CONFIG_XFRM_SUB_POLICY - struct flowi *origin; - struct xfrm_selector *partner; -#endif u32 xfrm_genid; u32 policy_genid; u32 route_mtu_cached; @@ -963,21 +996,46 @@ static inline void xfrm_dst_destroy(struct xfrm_dst *xdst) dst_release(xdst->route); if (likely(xdst->u.dst.xfrm)) xfrm_state_put(xdst->u.dst.xfrm); -#ifdef CONFIG_XFRM_SUB_POLICY - kfree(xdst->origin); - xdst->origin = NULL; - kfree(xdst->partner); - xdst->partner = NULL; -#endif } #endif void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev); +struct xfrm_offload { + /* Output sequence number for replay protection on offloading. */ + struct { + __u32 low; + __u32 hi; + } seq; + + __u32 flags; +#define SA_DELETE_REQ 1 +#define CRYPTO_DONE 2 +#define CRYPTO_NEXT_DONE 4 +#define CRYPTO_FALLBACK 8 +#define XFRM_GSO_SEGMENT 16 +#define XFRM_GRO 32 + + __u32 status; +#define CRYPTO_SUCCESS 1 +#define CRYPTO_GENERIC_ERROR 2 +#define CRYPTO_TRANSPORT_AH_AUTH_FAILED 4 +#define CRYPTO_TRANSPORT_ESP_AUTH_FAILED 8 +#define CRYPTO_TUNNEL_AH_AUTH_FAILED 16 +#define CRYPTO_TUNNEL_ESP_AUTH_FAILED 32 +#define CRYPTO_INVALID_PACKET_SYNTAX 64 +#define CRYPTO_INVALID_PROTOCOL 128 + + __u8 proto; +}; + struct sec_path { - atomic_t refcnt; + refcount_t refcnt; int len; + int olen; + struct xfrm_state *xvec[XFRM_MAX_DEPTH]; + struct xfrm_offload ovec[XFRM_MAX_OFFLOAD_DEPTH]; }; static inline int secpath_exists(struct sk_buff *skb) @@ -993,7 +1051,7 @@ static inline struct sec_path * secpath_get(struct sec_path *sp) { if (sp) - atomic_inc(&sp->refcnt); + refcount_inc(&sp->refcnt); return sp; } @@ -1002,11 +1060,12 @@ void __secpath_destroy(struct sec_path *sp); static inline void secpath_put(struct sec_path *sp) { - if (sp && atomic_dec_and_test(&sp->refcnt)) + if (sp && refcount_dec_and_test(&sp->refcnt)) __secpath_destroy(sp); } struct sec_path *secpath_dup(struct sec_path *src); +int secpath_set(struct sk_buff *skb); static inline void secpath_reset(struct sk_buff *skb) @@ -1168,6 +1227,7 @@ static inline void xfrm_sk_free_policy(struct sock *sk) } void xfrm_garbage_collect(struct net *net); +void xfrm_garbage_collect_deferred(struct net *net); #else @@ -1499,6 +1559,7 @@ struct xfrmk_spdinfo { struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq); int xfrm_state_delete(struct xfrm_state *x); int xfrm_state_flush(struct net *net, u8 proto, bool task_valid); +int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid); void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si); void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si); u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq); @@ -1519,6 +1580,7 @@ int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); int xfrm4_transport_finish(struct sk_buff *skb, int async); int xfrm4_rcv(struct sk_buff *skb); +int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq); static inline int xfrm4_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) { @@ -1580,6 +1642,11 @@ static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) } #endif +struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif, + const xfrm_address_t *saddr, + const xfrm_address_t *daddr, + int family); + struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp); void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type); @@ -1610,13 +1677,16 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol); #ifdef CONFIG_XFRM_MIGRATE int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_bundles, - const struct xfrm_kmaddress *k); + const struct xfrm_kmaddress *k, + const struct xfrm_encap_tmpl *encap); struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net); struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x, - struct xfrm_migrate *m); + struct xfrm_migrate *m, + struct xfrm_encap_tmpl *encap); int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_bundles, - struct xfrm_kmaddress *k, struct net *net); + struct xfrm_kmaddress *k, struct net *net, + struct xfrm_encap_tmpl *encap); #endif int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport); @@ -1774,6 +1844,67 @@ static inline struct xfrm_state *xfrm_input_state(struct sk_buff *skb) { return skb->sp->xvec[skb->sp->len - 1]; } +static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb) +{ + struct sec_path *sp = skb->sp; + + if (!sp || !sp->olen || sp->len != sp->olen) + return NULL; + + return &sp->ovec[sp->olen - 1]; +} +#endif + +void __net_init xfrm_dev_init(void); + +#ifdef CONFIG_XFRM_OFFLOAD +int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features); +int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, + struct xfrm_user_offload *xuo); +bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x); + +static inline void xfrm_dev_state_delete(struct xfrm_state *x) +{ + struct xfrm_state_offload *xso = &x->xso; + + if (xso->dev) + xso->dev->xfrmdev_ops->xdo_dev_state_delete(x); +} + +static inline void xfrm_dev_state_free(struct xfrm_state *x) +{ + struct xfrm_state_offload *xso = &x->xso; + struct net_device *dev = xso->dev; + + if (dev && dev->xfrmdev_ops) { + dev->xfrmdev_ops->xdo_dev_state_free(x); + xso->dev = NULL; + dev_put(dev); + } +} +#else +static inline int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features) +{ + return 0; +} + +static inline int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, struct xfrm_user_offload *xuo) +{ + return 0; +} + +static inline void xfrm_dev_state_delete(struct xfrm_state *x) +{ +} + +static inline void xfrm_dev_state_free(struct xfrm_state *x) +{ +} + +static inline bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x) +{ + return false; +} #endif static inline int xfrm_mark_get(struct nlattr **attrs, struct xfrm_mark *m) |