diff options
Diffstat (limited to 'include/net')
68 files changed, 1459 insertions, 499 deletions
diff --git a/include/net/6lowpan.h b/include/net/6lowpan.h index dc03d77ad23b..a2f59ec98d24 100644 --- a/include/net/6lowpan.h +++ b/include/net/6lowpan.h @@ -197,6 +197,27 @@  #define LOWPAN_NHC_UDP_CS_P_11	0xF3 /* source & dest = 0xF0B + 4bit inline */  #define LOWPAN_NHC_UDP_CS_C	0x04 /* checksum elided */ +#define LOWPAN_PRIV_SIZE(llpriv_size)	\ +	(sizeof(struct lowpan_priv) + llpriv_size) + +enum lowpan_lltypes { +	LOWPAN_LLTYPE_BTLE, +	LOWPAN_LLTYPE_IEEE802154, +}; + +struct lowpan_priv { +	enum lowpan_lltypes lltype; + +	/* must be last */ +	u8 priv[0] __aligned(sizeof(void *)); +}; + +static inline +struct lowpan_priv *lowpan_priv(const struct net_device *dev) +{ +	return netdev_priv(dev); +} +  #ifdef DEBUG  /* print data in line */  static inline void raw_dump_inline(const char *caller, char *msg, @@ -372,6 +393,8 @@ lowpan_uncompress_size(const struct sk_buff *skb, u16 *dgram_offset)  	return skb->len + uncomp_header - ret;  } +void lowpan_netdev_setup(struct net_device *dev, enum lowpan_lltypes lltype); +  int  lowpan_header_decompress(struct sk_buff *skb, struct net_device *dev,  			 const u8 *saddr, const u8 saddr_type, diff --git a/include/net/act_api.h b/include/net/act_api.h index 3ee4c92afd1b..9d446f136607 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -21,6 +21,8 @@ struct tcf_common {  	struct gnet_stats_rate_est64	tcfc_rate_est;  	spinlock_t			tcfc_lock;  	struct rcu_head			tcfc_rcu; +	struct gnet_stats_basic_cpu __percpu *cpu_bstats; +	struct gnet_stats_queue __percpu *cpu_qstats;  };  #define tcf_head	common.tcfc_head  #define tcf_index	common.tcfc_index @@ -68,6 +70,17 @@ static inline void tcf_hashinfo_destroy(struct tcf_hashinfo *hf)  	kfree(hf->htab);  } +/* Update lastuse only if needed, to avoid dirtying a cache line. + * We use a temp variable to avoid fetching jiffies twice. + */ +static inline void tcf_lastuse_update(struct tcf_t *tm) +{ +	unsigned long now = jiffies; + +	if (tm->lastuse != now) +		tm->lastuse = now; +} +  #ifdef CONFIG_NET_CLS_ACT  #define ACT_P_CREATED 1 @@ -98,15 +111,20 @@ struct tc_action_ops {  };  int tcf_hash_search(struct tc_action *a, u32 index); -void tcf_hash_destroy(struct tc_action *a); -int tcf_hash_release(struct tc_action *a, int bind);  u32 tcf_hash_new_index(struct tcf_hashinfo *hinfo);  int tcf_hash_check(u32 index, struct tc_action *a, int bind);  int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, -		    int size, int bind); +		    int size, int bind, bool cpustats);  void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est);  void tcf_hash_insert(struct tc_action *a); +int __tcf_hash_release(struct tc_action *a, bool bind, bool strict); + +static inline int tcf_hash_release(struct tc_action *a, bool bind) +{ +	return __tcf_hash_release(a, bind, false); +} +  int tcf_register_action(struct tc_action_ops *a, unsigned int mask);  int tcf_unregister_action(struct tc_action_ops *a);  int tcf_action_destroy(struct list_head *actions, int bind); diff --git a/include/net/addrconf.h b/include/net/addrconf.h index def59d3a34d5..b5474b1fcd83 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -91,6 +91,37 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2);  void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr);  void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr); +static inline int addrconf_ifid_eui48(u8 *eui, struct net_device *dev) +{ +	if (dev->addr_len != ETH_ALEN) +		return -1; +	memcpy(eui, dev->dev_addr, 3); +	memcpy(eui + 5, dev->dev_addr + 3, 3); + +	/* +	 * The zSeries OSA network cards can be shared among various +	 * OS instances, but the OSA cards have only one MAC address. +	 * This leads to duplicate address conflicts in conjunction +	 * with IPv6 if more than one instance uses the same card. +	 * +	 * The driver for these cards can deliver a unique 16-bit +	 * identifier for each instance sharing the same card.  It is +	 * placed instead of 0xFFFE in the interface identifier.  The +	 * "u" bit of the interface identifier is not inverted in this +	 * case.  Hence the resulting interface identifier has local +	 * scope according to RFC2373. +	 */ +	if (dev->dev_id) { +		eui[3] = (dev->dev_id >> 8) & 0xFF; +		eui[4] = dev->dev_id & 0xFF; +	} else { +		eui[3] = 0xFF; +		eui[4] = 0xFE; +		eui[0] ^= 2; +	} +	return 0; +} +  static inline unsigned long addrconf_timeout_fixup(u32 timeout,  						   unsigned int unit)  { @@ -158,8 +189,8 @@ struct ipv6_stub {  				 const struct in6_addr *addr);  	int (*ipv6_sock_mc_drop)(struct sock *sk, int ifindex,  				 const struct in6_addr *addr); -	int (*ipv6_dst_lookup)(struct sock *sk, struct dst_entry **dst, -				struct flowi6 *fl6); +	int (*ipv6_dst_lookup)(struct net *net, struct sock *sk, +			       struct dst_entry **dst, struct flowi6 *fl6);  	void (*udpv6_encap_enable)(void);  	void (*ndisc_send_na)(struct net_device *dev, struct neighbour *neigh,  			      const struct in6_addr *daddr, diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 3bd618d3e55d..9e1a59e01fa2 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -512,9 +512,11 @@ struct hci_conn_params {  		HCI_AUTO_CONN_DIRECT,  		HCI_AUTO_CONN_ALWAYS,  		HCI_AUTO_CONN_LINK_LOSS, +		HCI_AUTO_CONN_EXPLICIT,  	} auto_connect;  	struct hci_conn *conn; +	bool explicit_connect;  };  extern struct list_head hci_dev_list; @@ -639,6 +641,7 @@ enum {  	HCI_CONN_DROP,  	HCI_CONN_PARAM_REMOVAL_PEND,  	HCI_CONN_NEW_LINK_KEY, +	HCI_CONN_SCANNING,  };  static inline bool hci_conn_ssp_enabled(struct hci_conn *conn) @@ -808,6 +811,26 @@ static inline struct hci_conn *hci_conn_hash_lookup_state(struct hci_dev *hdev,  	return NULL;  } +static inline struct hci_conn *hci_lookup_le_connect(struct hci_dev *hdev) +{ +	struct hci_conn_hash *h = &hdev->conn_hash; +	struct hci_conn  *c; + +	rcu_read_lock(); + +	list_for_each_entry_rcu(c, &h->list, list) { +		if (c->type == LE_LINK && c->state == BT_CONNECT && +		    !test_bit(HCI_CONN_SCANNING, &c->flags)) { +			rcu_read_unlock(); +			return c; +		} +	} + +	rcu_read_unlock(); + +	return NULL; +} +  int hci_disconnect(struct hci_conn *conn, __u8 reason);  bool hci_setup_sync(struct hci_conn *conn, __u16 handle);  void hci_sco_setup(struct hci_conn *conn, __u8 status); @@ -823,6 +846,9 @@ void hci_chan_del(struct hci_chan *chan);  void hci_chan_list_flush(struct hci_conn *conn);  struct hci_chan *hci_chan_lookup_handle(struct hci_dev *hdev, __u16 handle); +struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, +				     u8 dst_type, u8 sec_level, +				     u16 conn_timeout, u8 role);  struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,  				u8 dst_type, u8 sec_level, u16 conn_timeout,  				u8 role); @@ -988,6 +1014,9 @@ void hci_conn_params_clear_disabled(struct hci_dev *hdev);  struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list,  						  bdaddr_t *addr,  						  u8 addr_type); +struct hci_conn_params *hci_explicit_connect_lookup(struct hci_dev *hdev, +						    bdaddr_t *addr, +						    u8 addr_type);  void hci_uuids_clear(struct hci_dev *hdev); @@ -1297,7 +1326,7 @@ static inline int hci_check_conn_params(u16 min, u16 max, u16 latency,  	if (max >= to_multiplier * 8)  		return -EINVAL; -	max_latency = (to_multiplier * 8 / max) - 1; +	max_latency = (to_multiplier * 4 / max) - 1;  	if (latency > 499 || latency > max_latency)  		return -EINVAL; diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 2239a3753092..c98afc08cc26 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -55,6 +55,8 @@  #define L2CAP_INFO_TIMEOUT		msecs_to_jiffies(4000)  #define L2CAP_MOVE_TIMEOUT		msecs_to_jiffies(4000)  #define L2CAP_MOVE_ERTX_TIMEOUT		msecs_to_jiffies(60000) +#define L2CAP_WAIT_ACK_POLL_PERIOD	msecs_to_jiffies(200) +#define L2CAP_WAIT_ACK_TIMEOUT		msecs_to_jiffies(10000)  #define L2CAP_A2MP_DEFAULT_MTU		670 diff --git a/include/net/bond_options.h b/include/net/bond_options.h index c28aca25320e..1797235cd590 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -66,6 +66,7 @@ enum {  	BOND_OPT_AD_ACTOR_SYS_PRIO,  	BOND_OPT_AD_ACTOR_SYSTEM,  	BOND_OPT_AD_USER_PORT_KEY, +	BOND_OPT_NUM_PEER_NOTIF_ALIAS,  	BOND_OPT_LAST  }; diff --git a/include/net/bonding.h b/include/net/bonding.h index 20defc0353d1..c1740a2794a3 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -310,6 +310,13 @@ static inline bool bond_uses_primary(struct bonding *bond)  	return bond_mode_uses_primary(BOND_MODE(bond));  } +static inline struct net_device *bond_option_active_slave_get_rcu(struct bonding *bond) +{ +	struct slave *slave = rcu_dereference(bond->curr_active_slave); + +	return bond_uses_primary(bond) && slave ? slave->dev : NULL; +} +  static inline bool bond_slave_is_up(struct slave *slave)  {  	return netif_running(slave->dev) && netif_carrier_ok(slave->dev); diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index a741678f24a2..f0889a247643 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2369,8 +2369,7 @@ struct cfg80211_qos_map {   *	method returns 0.)   *   * @mgmt_frame_register: Notify driver that a management frame type was - *	registered. Note that this callback may not sleep, and cannot run - *	concurrently with itself. + *	registered. The callback is allowed to sleep.   *   * @set_antenna: Set antenna configuration (tx_ant, rx_ant) on the device.   *	Parameters are bitmaps of allowed antennas to use for TX/RX. Drivers may @@ -4868,6 +4867,23 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy,  			     struct cfg80211_chan_def *chandef,  			     enum nl80211_iftype iftype); +/** + * cfg80211_reg_can_beacon_relax - check if beaconing is allowed with relaxation + * @wiphy: the wiphy + * @chandef: the channel definition + * @iftype: interface type + * + * Return: %true if there is no secondary channel or the secondary channel(s) + * can be used for beaconing (i.e. is not a radar channel etc.). This version + * also checks if IR-relaxation conditions apply, to allow beaconing under + * more permissive conditions. + * + * Requires the RTNL to be held. + */ +bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy, +				   struct cfg80211_chan_def *chandef, +				   enum nl80211_iftype iftype); +  /*   * cfg80211_ch_switch_notify - update wdev channel and notify userspace   * @dev: the device which switched channels diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index 290a9a69af07..76b1ffaea863 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -34,6 +34,8 @@ struct cfg802154_ops {  							   int type);  	void	(*del_virtual_intf_deprecated)(struct wpan_phy *wpan_phy,  					       struct net_device *dev); +	int	(*suspend)(struct wpan_phy *wpan_phy); +	int	(*resume)(struct wpan_phy *wpan_phy);  	int	(*add_virtual_intf)(struct wpan_phy *wpan_phy,  				    const char *name,  				    unsigned char name_assign_type, @@ -61,6 +63,8 @@ struct cfg802154_ops {  					 s8 max_frame_retries);  	int	(*set_lbt_mode)(struct wpan_phy *wpan_phy,  				struct wpan_dev *wpan_dev, bool mode); +	int	(*set_ackreq_default)(struct wpan_phy *wpan_phy, +				      struct wpan_dev *wpan_dev, bool ackreq);  };  static inline bool @@ -171,6 +175,9 @@ struct wpan_dev {  	struct list_head list;  	struct net_device *netdev; +	/* lowpan interface, set when the wpan_dev belongs to one lowpan_dev */ +	struct net_device *lowpan_dev; +  	u32 identifier;  	/* MAC PIB */ @@ -191,6 +198,9 @@ struct wpan_dev {  	bool lbt;  	bool promiscuous_mode; + +	/* fallback for acknowledgment bit setting */ +	bool ackreq;  };  #define to_phy(_dev)	container_of(_dev, struct wpan_phy, dev) diff --git a/include/net/checksum.h b/include/net/checksum.h index 2d1d73cb773e..9fcaedf994ee 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -140,14 +140,16 @@ static inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new)  struct sk_buff;  void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, -			      __be32 from, __be32 to, int pseudohdr); +			      __be32 from, __be32 to, bool pseudohdr);  void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,  			       const __be32 *from, const __be32 *to, -			       int pseudohdr); +			       bool pseudohdr); +void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, +				     __wsum diff, bool pseudohdr);  static inline void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb,  					    __be16 from, __be16 to, -					    int pseudohdr) +					    bool pseudohdr)  {  	inet_proto_csum_replace4(sum, skb, (__force __be32)from,  				 (__force __be32)to, pseudohdr); diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h index c15d39456e14..ccd6d8bffa4d 100644 --- a/include/net/cls_cgroup.h +++ b/include/net/cls_cgroup.h @@ -49,9 +49,38 @@ static inline void sock_update_classid(struct sock *sk)  	if (classid != sk->sk_classid)  		sk->sk_classid = classid;  } + +static inline u32 task_get_classid(const struct sk_buff *skb) +{ +	u32 classid = task_cls_state(current)->classid; + +	/* Due to the nature of the classifier it is required to ignore all +	 * packets originating from softirq context as accessing `current' +	 * would lead to false results. +	 * +	 * This test assumes that all callers of dev_queue_xmit() explicitly +	 * disable bh. Knowing this, it is possible to detect softirq based +	 * calls by looking at the number of nested bh disable calls because +	 * softirqs always disables bh. +	 */ +	if (in_serving_softirq()) { +		/* If there is an sk_classid we'll use that. */ +		if (!skb->sk) +			return 0; + +		classid = skb->sk->sk_classid; +	} + +	return classid; +}  #else /* !CONFIG_CGROUP_NET_CLASSID */  static inline void sock_update_classid(struct sock *sk)  {  } + +static inline u32 task_get_classid(const struct sk_buff *skb) +{ +	return 0; +}  #endif /* CONFIG_CGROUP_NET_CLASSID */  #endif  /* _NET_CLS_CGROUP_H */ diff --git a/include/net/dsa.h b/include/net/dsa.h index fbca63ba8f73..b34d812bc5d0 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -171,6 +171,11 @@ static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p)  	return !!(ds->index == ds->dst->cpu_switch && p == ds->dst->cpu_port);  } +static inline bool dsa_is_dsa_port(struct dsa_switch *ds, int p) +{ +	return !!((ds->dsa_port_mask) & (1 << p)); +} +  static inline bool dsa_is_port_initialized(struct dsa_switch *ds, int p)  {  	return ds->phys_port_mask & (1 << p) && ds->ports[p]; @@ -296,12 +301,28 @@ struct dsa_switch_driver {  				     u32 br_port_mask);  	int	(*port_stp_update)(struct dsa_switch *ds, int port,  				   u8 state); -	int	(*fdb_add)(struct dsa_switch *ds, int port, -			   const unsigned char *addr, u16 vid); -	int	(*fdb_del)(struct dsa_switch *ds, int port, -			   const unsigned char *addr, u16 vid); -	int	(*fdb_getnext)(struct dsa_switch *ds, int port, -			       unsigned char *addr, bool *is_static); + +	/* +	 * VLAN support +	 */ +	int	(*port_pvid_get)(struct dsa_switch *ds, int port, u16 *pvid); +	int	(*port_pvid_set)(struct dsa_switch *ds, int port, u16 pvid); +	int	(*port_vlan_add)(struct dsa_switch *ds, int port, u16 vid, +				 bool untagged); +	int	(*port_vlan_del)(struct dsa_switch *ds, int port, u16 vid); +	int	(*vlan_getnext)(struct dsa_switch *ds, u16 *vid, +				unsigned long *ports, unsigned long *untagged); + +	/* +	 * Forwarding database +	 */ +	int	(*port_fdb_add)(struct dsa_switch *ds, int port, +				const unsigned char *addr, u16 vid); +	int	(*port_fdb_del)(struct dsa_switch *ds, int port, +				const unsigned char *addr, u16 vid); +	int	(*port_fdb_getnext)(struct dsa_switch *ds, int port, +				    unsigned char *addr, u16 *vid, +				    bool *is_static);  };  void register_switch_driver(struct dsa_switch_driver *type); diff --git a/include/net/dst.h b/include/net/dst.h index 2bc73f8a00a9..9261d928303d 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -57,6 +57,7 @@ struct dst_entry {  #define DST_FAKE_RTABLE		0x0040  #define DST_XFRM_TUNNEL		0x0080  #define DST_XFRM_QUEUE		0x0100 +#define DST_METADATA		0x0200  	unsigned short		pending_confirm; @@ -83,12 +84,13 @@ struct dst_entry {  	__u32			__pad2;  #endif +#ifdef CONFIG_64BIT +	struct lwtunnel_state   *lwtstate;  	/*  	 * Align __refcnt to a 64 bytes alignment  	 * (L1_CACHE_SIZE would be too much)  	 */ -#ifdef CONFIG_64BIT -	long			__pad_to_align_refcnt[2]; +	long			__pad_to_align_refcnt[1];  #endif  	/*  	 * __refcnt wants to be on a different cache line from @@ -97,6 +99,9 @@ struct dst_entry {  	atomic_t		__refcnt;	/* client references	*/  	int			__use;  	unsigned long		lastuse; +#ifndef CONFIG_64BIT +	struct lwtunnel_state   *lwtstate; +#endif  	union {  		struct dst_entry	*next;  		struct rtable __rcu	*rt_next; @@ -202,6 +207,12 @@ static inline void dst_metric_set(struct dst_entry *dst, int metric, u32 val)  		p[metric-1] = val;  } +/* Kernel-internal feature bits that are unallocated in user space. */ +#define DST_FEATURE_ECN_CA	(1 << 31) + +#define DST_FEATURE_MASK	(DST_FEATURE_ECN_CA) +#define DST_FEATURE_ECN_MASK	(DST_FEATURE_ECN_CA | RTAX_FEATURE_ECN) +  static inline u32  dst_feature(const struct dst_entry *dst, u32 feature)  { @@ -284,13 +295,18 @@ static inline void skb_dst_drop(struct sk_buff *skb)  	}  } -static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb) +static inline void __skb_dst_copy(struct sk_buff *nskb, unsigned long refdst)  { -	nskb->_skb_refdst = oskb->_skb_refdst; +	nskb->_skb_refdst = refdst;  	if (!(nskb->_skb_refdst & SKB_DST_NOREF))  		dst_clone(skb_dst(nskb));  } +static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb) +{ +	__skb_dst_copy(nskb, oskb->_skb_refdst); +} +  /**   * skb_dst_force - makes sure skb dst is refcounted   * @skb: buffer @@ -356,6 +372,9 @@ static inline int dst_discard(struct sk_buff *skb)  }  void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_ref,  		int initial_obsolete, unsigned short flags); +void dst_init(struct dst_entry *dst, struct dst_ops *ops, +	      struct net_device *dev, int initial_ref, int initial_obsolete, +	      unsigned short flags);  void __dst_free(struct dst_entry *dst);  struct dst_entry *dst_destroy(struct dst_entry *dst); @@ -457,7 +476,7 @@ static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie)  	return dst;  } -void dst_init(void); +void dst_subsys_init(void);  /* Flags for xfrm_lookup flags argument. */  enum { diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h new file mode 100644 index 000000000000..af9d5382f6cb --- /dev/null +++ b/include/net/dst_metadata.h @@ -0,0 +1,108 @@ +#ifndef __NET_DST_METADATA_H +#define __NET_DST_METADATA_H 1 + +#include <linux/skbuff.h> +#include <net/ip_tunnels.h> +#include <net/dst.h> + +struct metadata_dst { +	struct dst_entry		dst; +	union { +		struct ip_tunnel_info	tun_info; +	} u; +}; + +static inline struct metadata_dst *skb_metadata_dst(struct sk_buff *skb) +{ +	struct metadata_dst *md_dst = (struct metadata_dst *) skb_dst(skb); + +	if (md_dst && md_dst->dst.flags & DST_METADATA) +		return md_dst; + +	return NULL; +} + +static inline struct ip_tunnel_info *skb_tunnel_info(struct sk_buff *skb) +{ +	struct metadata_dst *md_dst = skb_metadata_dst(skb); +	struct dst_entry *dst; + +	if (md_dst) +		return &md_dst->u.tun_info; + +	dst = skb_dst(skb); +	if (dst && dst->lwtstate) +		return lwt_tun_info(dst->lwtstate); + +	return NULL; +} + +static inline bool skb_valid_dst(const struct sk_buff *skb) +{ +	struct dst_entry *dst = skb_dst(skb); + +	return dst && !(dst->flags & DST_METADATA); +} + +struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags); +struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags); + +static inline struct metadata_dst *tun_rx_dst(int md_size) +{ +	struct metadata_dst *tun_dst; + +	tun_dst = metadata_dst_alloc(md_size, GFP_ATOMIC); +	if (!tun_dst) +		return NULL; + +	tun_dst->u.tun_info.options_len = 0; +	tun_dst->u.tun_info.mode = 0; +	return tun_dst; +} + +static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, +						 __be16 flags, +						 __be64 tunnel_id, +						 int md_size) +{ +	const struct iphdr *iph = ip_hdr(skb); +	struct metadata_dst *tun_dst; + +	tun_dst = tun_rx_dst(md_size); +	if (!tun_dst) +		return NULL; + +	ip_tunnel_key_init(&tun_dst->u.tun_info.key, +			   iph->saddr, iph->daddr, iph->tos, iph->ttl, +			   0, 0, tunnel_id, flags); +	return tun_dst; +} + +static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, +						 __be16 flags, +						 __be64 tunnel_id, +						 int md_size) +{ +	const struct ipv6hdr *ip6h = ipv6_hdr(skb); +	struct metadata_dst *tun_dst; +	struct ip_tunnel_info *info; + +	tun_dst = tun_rx_dst(md_size); +	if (!tun_dst) +		return NULL; + +	info = &tun_dst->u.tun_info; +	info->mode = IP_TUNNEL_INFO_IPV6; +	info->key.tun_flags = flags; +	info->key.tun_id = tunnel_id; +	info->key.tp_src = 0; +	info->key.tp_dst = 0; + +	info->key.u.ipv6.src = ip6h->saddr; +	info->key.u.ipv6.dst = ip6h->daddr; +	info->key.tos = ipv6_get_dsfield(ip6h); +	info->key.ttl = ip6h->hop_limit; +	return tun_dst; +} + +#endif /* __NET_DST_METADATA_H */ diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 903a55efbffe..59160de702b6 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -19,6 +19,7 @@ struct fib_rule {  	u8			action;  	/* 3 bytes hole, try to use */  	u32			target; +	__be64			tun_id;  	struct fib_rule __rcu	*ctarget;  	struct net		*fr_net; @@ -65,7 +66,6 @@ struct fib_rules_ops {  					   struct nlattr **);  	int			(*fill)(struct fib_rule *, struct sk_buff *,  					struct fib_rule_hdr *); -	u32			(*default_pref)(struct fib_rules_ops *ops);  	size_t			(*nlmsg_payload)(struct fib_rule *);  	/* Called after modifications to the rules set, must flush @@ -117,5 +117,4 @@ int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags,  		     struct fib_lookup_arg *);  int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table,  			 u32 flags); -u32 fib_default_rule_pref(struct fib_rules_ops *ops);  #endif diff --git a/include/net/flow.h b/include/net/flow.h index 8109a159d1b3..acd6a096250e 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -10,6 +10,7 @@  #include <linux/socket.h>  #include <linux/in6.h>  #include <linux/atomic.h> +#include <net/flow_dissector.h>  /*   * ifindex generation is per-net namespace, and loopback is @@ -19,6 +20,10 @@  #define LOOPBACK_IFINDEX	1 +struct flowi_tunnel { +	__be64			tun_id; +}; +  struct flowi_common {  	int	flowic_oif;  	int	flowic_iif; @@ -29,7 +34,9 @@ struct flowi_common {  	__u8	flowic_flags;  #define FLOWI_FLAG_ANYSRC		0x01  #define FLOWI_FLAG_KNOWN_NH		0x02 +#define FLOWI_FLAG_VRFSRC		0x04  	__u32	flowic_secid; +	struct flowi_tunnel flowic_tun_key;  };  union flowi_uli { @@ -66,6 +73,7 @@ struct flowi4 {  #define flowi4_proto		__fl_common.flowic_proto  #define flowi4_flags		__fl_common.flowic_flags  #define flowi4_secid		__fl_common.flowic_secid +#define flowi4_tun_key		__fl_common.flowic_tun_key  	/* (saddr,daddr) must be grouped, same order as in IP header */  	__be32			saddr; @@ -95,6 +103,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif,  	fl4->flowi4_proto = proto;  	fl4->flowi4_flags = flags;  	fl4->flowi4_secid = 0; +	fl4->flowi4_tun_key.tun_id = 0;  	fl4->daddr = daddr;  	fl4->saddr = saddr;  	fl4->fl4_dport = dport; @@ -122,6 +131,7 @@ struct flowi6 {  #define flowi6_proto		__fl_common.flowic_proto  #define flowi6_flags		__fl_common.flowic_flags  #define flowi6_secid		__fl_common.flowic_secid +#define flowi6_tun_key		__fl_common.flowic_tun_key  	struct in6_addr		daddr;  	struct in6_addr		saddr;  	__be32			flowlabel; @@ -165,6 +175,7 @@ struct flowi {  #define flowi_proto	u.__fl_common.flowic_proto  #define flowi_flags	u.__fl_common.flowic_flags  #define flowi_secid	u.__fl_common.flowic_secid +#define flowi_tun_key	u.__fl_common.flowic_tun_key  } __attribute__((__aligned__(BITS_PER_LONG/8)));  static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4) @@ -233,4 +244,22 @@ void flow_cache_flush(struct net *net);  void flow_cache_flush_deferred(struct net *net);  extern atomic_t flow_cache_genid; +__u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys); + +static inline __u32 get_hash_from_flowi6(const struct flowi6 *fl6) +{ +	struct flow_keys keys; + +	return __get_hash_from_flowi6(fl6, &keys); +} + +__u32 __get_hash_from_flowi4(const struct flowi4 *fl4, struct flow_keys *keys); + +static inline __u32 get_hash_from_flowi4(const struct flowi4 *fl4) +{ +	struct flow_keys keys; + +	return __get_hash_from_flowi4(fl4, &keys); +} +  #endif diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 1a8c22419936..8c8548cf5888 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -2,7 +2,6 @@  #define _NET_FLOW_DISSECTOR_H  #include <linux/types.h> -#include <linux/skbuff.h>  #include <linux/in6.h>  #include <uapi/linux/if_ether.h> @@ -13,8 +12,13 @@  struct flow_dissector_key_control {  	u16	thoff;  	u16	addr_type; +	u32	flags;  }; +#define FLOW_DIS_IS_FRAGMENT	BIT(0) +#define FLOW_DIS_FIRST_FRAG	BIT(1) +#define FLOW_DIS_ENCAPSULATION	BIT(2) +  /**   * struct flow_dissector_key_basic:   * @thoff: Transport header offset @@ -123,6 +127,11 @@ enum flow_dissector_key_id {  	FLOW_DISSECTOR_KEY_MAX,  }; +#define FLOW_DISSECTOR_F_PARSE_1ST_FRAG		BIT(0) +#define FLOW_DISSECTOR_F_STOP_AT_L3		BIT(1) +#define FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL	BIT(2) +#define FLOW_DISSECTOR_F_STOP_AT_ENCAP		BIT(3) +  struct flow_dissector_key {  	enum flow_dissector_key_id key_id;  	size_t offset; /* offset of struct flow_dissector_key_* @@ -134,23 +143,6 @@ struct flow_dissector {  	unsigned short int offset[FLOW_DISSECTOR_KEY_MAX];  }; -void skb_flow_dissector_init(struct flow_dissector *flow_dissector, -			     const struct flow_dissector_key *key, -			     unsigned int key_count); - -bool __skb_flow_dissect(const struct sk_buff *skb, -			struct flow_dissector *flow_dissector, -			void *target_container, -			void *data, __be16 proto, int nhoff, int hlen); - -static inline bool skb_flow_dissect(const struct sk_buff *skb, -				    struct flow_dissector *flow_dissector, -				    void *target_container) -{ -	return __skb_flow_dissect(skb, flow_dissector, target_container, -				  NULL, 0, 0, 0); -} -  struct flow_keys {  	struct flow_dissector_key_control control;  #define FLOW_KEYS_HASH_START_FIELD basic @@ -170,38 +162,6 @@ __be32 flow_get_u32_dst(const struct flow_keys *flow);  extern struct flow_dissector flow_keys_dissector;  extern struct flow_dissector flow_keys_buf_dissector; -static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb, -					      struct flow_keys *flow) -{ -	memset(flow, 0, sizeof(*flow)); -	return __skb_flow_dissect(skb, &flow_keys_dissector, flow, -				  NULL, 0, 0, 0); -} - -static inline bool skb_flow_dissect_flow_keys_buf(struct flow_keys *flow, -						  void *data, __be16 proto, -						  int nhoff, int hlen) -{ -	memset(flow, 0, sizeof(*flow)); -	return __skb_flow_dissect(NULL, &flow_keys_buf_dissector, flow, -				  data, proto, nhoff, hlen); -} - -__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, -			    void *data, int hlen_proto); - -static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, -					int thoff, u8 ip_proto) -{ -	return __skb_flow_get_ports(skb, thoff, ip_proto, NULL, 0); -} - -u32 flow_hash_from_keys(struct flow_keys *keys); -void __skb_get_hash(struct sk_buff *skb); -u32 skb_get_poff(const struct sk_buff *skb); -u32 __skb_get_poff(const struct sk_buff *skb, void *data, -		   const struct flow_keys *keys, int hlen); -  /* struct flow_keys_digest:   *   * This structure is used to hold a digest of the full flow keys. This is a @@ -217,4 +177,11 @@ struct flow_keys_digest {  void make_flow_keys_digest(struct flow_keys_digest *digest,  			   const struct flow_keys *flow); +static inline bool flow_keys_have_l4(struct flow_keys *keys) +{ +	return (keys->ports.ports || keys->tags.flow_label); +} + +u32 flow_hash_from_keys(struct flow_keys *keys); +  #endif diff --git a/include/net/geneve.h b/include/net/geneve.h index 2a0543a1899d..3106ed6eae0d 100644 --- a/include/net/geneve.h +++ b/include/net/geneve.h @@ -62,40 +62,9 @@ struct genevehdr {  	struct geneve_opt options[];  }; -static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) -{ -	return (struct genevehdr *)(udp_hdr(skb) + 1); -} -  #ifdef CONFIG_INET -struct geneve_sock; - -typedef void (geneve_rcv_t)(struct geneve_sock *gs, struct sk_buff *skb); - -struct geneve_sock { -	struct list_head	list; -	geneve_rcv_t		*rcv; -	void			*rcv_data; -	struct socket		*sock; -	struct rcu_head		rcu; -	int			refcnt; -	struct udp_offload	udp_offloads; -}; - -#define GENEVE_VER 0 -#define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr)) - -struct geneve_sock *geneve_sock_add(struct net *net, __be16 port, -				    geneve_rcv_t *rcv, void *data, -				    bool no_share, bool ipv6); - -void geneve_sock_release(struct geneve_sock *vs); - -int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt, -		    struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, -		    __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, -		    __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt, -		    bool csum, bool xnet); +struct net_device *geneve_dev_create_fb(struct net *net, const char *name, +					u8 name_assign_type, u16 dst_port);  #endif /*ifdef CONFIG_INET */  #endif /*ifdef__NET_GENEVE_H */ diff --git a/include/net/gre.h b/include/net/gre.h index b53182018743..97eafdc47eea 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -4,6 +4,12 @@  #include <linux/skbuff.h>  #include <net/ip_tunnels.h> +struct gre_base_hdr { +	__be16 flags; +	__be16 protocol; +}; +#define GRE_HEADER_SECTION 4 +  #define GREPROTO_CISCO		0  #define GREPROTO_PPTP		1  #define GREPROTO_MAX		2 @@ -14,91 +20,9 @@ struct gre_protocol {  	void (*err_handler)(struct sk_buff *skb, u32 info);  }; -struct gre_base_hdr { -	__be16 flags; -	__be16 protocol; -}; -#define GRE_HEADER_SECTION 4 -  int gre_add_protocol(const struct gre_protocol *proto, u8 version);  int gre_del_protocol(const struct gre_protocol *proto, u8 version); -struct gre_cisco_protocol { -	int (*handler)(struct sk_buff *skb, const struct tnl_ptk_info *tpi); -	int (*err_handler)(struct sk_buff *skb, u32 info, -			   const struct tnl_ptk_info *tpi); -	u8 priority; -}; - -int gre_cisco_register(struct gre_cisco_protocol *proto); -int gre_cisco_unregister(struct gre_cisco_protocol *proto); - -void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, -		      int hdr_len); - -static inline struct sk_buff *gre_handle_offloads(struct sk_buff *skb, -						  bool csum) -{ -	return iptunnel_handle_offloads(skb, csum, -					csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); -} - - -static inline int ip_gre_calc_hlen(__be16 o_flags) -{ -	int addend = 4; - -	if (o_flags&TUNNEL_CSUM) -		addend += 4; -	if (o_flags&TUNNEL_KEY) -		addend += 4; -	if (o_flags&TUNNEL_SEQ) -		addend += 4; -	return addend; -} - -static inline __be16 gre_flags_to_tnl_flags(__be16 flags) -{ -	__be16 tflags = 0; - -	if (flags & GRE_CSUM) -		tflags |= TUNNEL_CSUM; -	if (flags & GRE_ROUTING) -		tflags |= TUNNEL_ROUTING; -	if (flags & GRE_KEY) -		tflags |= TUNNEL_KEY; -	if (flags & GRE_SEQ) -		tflags |= TUNNEL_SEQ; -	if (flags & GRE_STRICT) -		tflags |= TUNNEL_STRICT; -	if (flags & GRE_REC) -		tflags |= TUNNEL_REC; -	if (flags & GRE_VERSION) -		tflags |= TUNNEL_VERSION; - -	return tflags; -} - -static inline __be16 tnl_flags_to_gre_flags(__be16 tflags) -{ -	__be16 flags = 0; - -	if (tflags & TUNNEL_CSUM) -		flags |= GRE_CSUM; -	if (tflags & TUNNEL_ROUTING) -		flags |= GRE_ROUTING; -	if (tflags & TUNNEL_KEY) -		flags |= GRE_KEY; -	if (tflags & TUNNEL_SEQ) -		flags |= GRE_SEQ; -	if (tflags & TUNNEL_STRICT) -		flags |= GRE_STRICT; -	if (tflags & TUNNEL_REC) -		flags |= GRE_REC; -	if (tflags & TUNNEL_VERSION) -		flags |= GRE_VERSION; - -	return flags; -} - +struct net_device *gretap_fb_dev_create(struct net *net, const char *name, +				       u8 name_assign_type);  #endif diff --git a/include/net/gro_cells.h b/include/net/gro_cells.h index 0f712c0bc0bf..cf6c74550baa 100644 --- a/include/net/gro_cells.h +++ b/include/net/gro_cells.h @@ -32,37 +32,28 @@ static inline void gro_cells_receive(struct gro_cells *gcells, struct sk_buff *s  		return;  	} -	/* We run in BH context */ -	spin_lock(&cell->napi_skbs.lock); -  	__skb_queue_tail(&cell->napi_skbs, skb);  	if (skb_queue_len(&cell->napi_skbs) == 1)  		napi_schedule(&cell->napi); - -	spin_unlock(&cell->napi_skbs.lock);  } -/* called unser BH context */ +/* called under BH context */  static inline int gro_cell_poll(struct napi_struct *napi, int budget)  {  	struct gro_cell *cell = container_of(napi, struct gro_cell, napi);  	struct sk_buff *skb;  	int work_done = 0; -	spin_lock(&cell->napi_skbs.lock);  	while (work_done < budget) {  		skb = __skb_dequeue(&cell->napi_skbs);  		if (!skb)  			break; -		spin_unlock(&cell->napi_skbs.lock);  		napi_gro_receive(napi, skb);  		work_done++; -		spin_lock(&cell->napi_skbs.lock);  	}  	if (work_done < budget) -		napi_complete(napi); -	spin_unlock(&cell->napi_skbs.lock); +		napi_complete_done(napi, work_done);  	return work_done;  } @@ -77,7 +68,7 @@ static inline int gro_cells_init(struct gro_cells *gcells, struct net_device *de  	for_each_possible_cpu(i) {  		struct gro_cell *cell = per_cpu_ptr(gcells->cells, i); -		skb_queue_head_init(&cell->napi_skbs); +		__skb_queue_head_init(&cell->napi_skbs);  		netif_napi_add(dev, &cell->napi, gro_cell_poll, 64);  		napi_enable(&cell->napi);  	} @@ -92,8 +83,9 @@ static inline void gro_cells_destroy(struct gro_cells *gcells)  		return;  	for_each_possible_cpu(i) {  		struct gro_cell *cell = per_cpu_ptr(gcells->cells, i); +  		netif_napi_del(&cell->napi); -		skb_queue_purge(&cell->napi_skbs); +		__skb_queue_purge(&cell->napi_skbs);  	}  	free_percpu(gcells->cells);  	gcells->cells = NULL; diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index e1300b3dd597..53eead2da743 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -21,13 +21,11 @@ struct netns_frags {   * @INET_FRAG_FIRST_IN: first fragment has arrived   * @INET_FRAG_LAST_IN: final fragment has arrived   * @INET_FRAG_COMPLETE: frag queue has been processed and is due for destruction - * @INET_FRAG_EVICTED: frag queue is being evicted   */  enum {  	INET_FRAG_FIRST_IN	= BIT(0),  	INET_FRAG_LAST_IN	= BIT(1),  	INET_FRAG_COMPLETE	= BIT(2), -	INET_FRAG_EVICTED	= BIT(3)  };  /** @@ -45,6 +43,7 @@ enum {   * @flags: fragment queue flags   * @max_size: maximum received fragment size   * @net: namespace that this frag belongs to + * @list_evictor: list of queues to forcefully evict (e.g. due to low memory)   */  struct inet_frag_queue {  	spinlock_t		lock; @@ -59,6 +58,7 @@ struct inet_frag_queue {  	__u8			flags;  	u16			max_size;  	struct netns_frags	*net; +	struct hlist_node	list_evictor;  };  #define INETFRAGS_HASHSZ	1024 @@ -125,6 +125,11 @@ static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f  		inet_frag_destroy(q, f);  } +static inline bool inet_frag_evicting(struct inet_frag_queue *q) +{ +	return !hlist_unhashed(&q->list_evictor); +} +  /* Memory Tracking Functions. */  /* The default percpu_counter batch size is not big enough to scale to @@ -139,14 +144,14 @@ static inline int frag_mem_limit(struct netns_frags *nf)  	return percpu_counter_read(&nf->mem);  } -static inline void sub_frag_mem_limit(struct inet_frag_queue *q, int i) +static inline void sub_frag_mem_limit(struct netns_frags *nf, int i)  { -	__percpu_counter_add(&q->net->mem, -i, frag_percpu_counter_batch); +	__percpu_counter_add(&nf->mem, -i, frag_percpu_counter_batch);  } -static inline void add_frag_mem_limit(struct inet_frag_queue *q, int i) +static inline void add_frag_mem_limit(struct netns_frags *nf, int i)  { -	__percpu_counter_add(&q->net->mem, i, frag_percpu_counter_batch); +	__percpu_counter_add(&nf->mem, i, frag_percpu_counter_batch);  }  static inline void init_frag_mem_limit(struct netns_frags *nf) diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index b73c88a19dd4..b07d126694a7 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -205,8 +205,8 @@ void inet_put_port(struct sock *sk);  void inet_hashinfo_init(struct inet_hashinfo *h); -int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw); -int __inet_hash(struct sock *sk, struct inet_timewait_sock *tw); +void __inet_hash_nolisten(struct sock *sk, struct sock *osk); +void __inet_hash(struct sock *sk, struct sock *osk);  void inet_hash(struct sock *sk);  void inet_unhash(struct sock *sk); diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 360c4802288d..879d6e5a973b 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -100,10 +100,8 @@ static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk)  void inet_twsk_free(struct inet_timewait_sock *tw);  void inet_twsk_put(struct inet_timewait_sock *tw); -int inet_twsk_unhash(struct inet_timewait_sock *tw); - -int inet_twsk_bind_unhash(struct inet_timewait_sock *tw, -			  struct inet_hashinfo *hashinfo); +void inet_twsk_bind_unhash(struct inet_timewait_sock *tw, +			   struct inet_hashinfo *hashinfo);  struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,  					   struct inet_timewait_death_row *dr, @@ -113,7 +111,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,  			   struct inet_hashinfo *hashinfo);  void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo); -void inet_twsk_deschedule(struct inet_timewait_sock *tw); +void inet_twsk_deschedule_put(struct inet_timewait_sock *tw);  void inet_twsk_purge(struct inet_hashinfo *hashinfo,  		     struct inet_timewait_death_row *twdr, int family); diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index d5332ddcea3f..4a6009d4486b 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -15,16 +15,20 @@  #include <net/ipv6.h>  #include <linux/atomic.h> -struct inetpeer_addr_base { -	union { -		__be32			a4; -		__be32			a6[4]; -		struct in6_addr		in6; -	}; +/* IPv4 address key for cache lookups */ +struct ipv4_addr_key { +	__be32	addr; +	int	vif;  }; +#define INETPEER_MAXKEYSZ   (sizeof(struct in6_addr) / sizeof(u32)) +  struct inetpeer_addr { -	struct inetpeer_addr_base	addr; +	union { +		struct ipv4_addr_key	a4; +		struct in6_addr		a6; +		u32			key[INETPEER_MAXKEYSZ]; +	};  	__u16				family;  }; @@ -65,69 +69,33 @@ struct inet_peer_base {  	int			total;  }; -#define INETPEER_BASE_BIT	0x1UL - -static inline struct inet_peer *inetpeer_ptr(unsigned long val) -{ -	BUG_ON(val & INETPEER_BASE_BIT); -	return (struct inet_peer *) val; -} +void inet_peer_base_init(struct inet_peer_base *); -static inline struct inet_peer_base *inetpeer_base_ptr(unsigned long val) -{ -	if (!(val & INETPEER_BASE_BIT)) -		return NULL; -	val &= ~INETPEER_BASE_BIT; -	return (struct inet_peer_base *) val; -} +void inet_initpeers(void) __init; -static inline bool inetpeer_ptr_is_peer(unsigned long val) -{ -	return !(val & INETPEER_BASE_BIT); -} +#define INETPEER_METRICS_NEW	(~(u32) 0) -static inline void __inetpeer_ptr_set_peer(unsigned long *val, struct inet_peer *peer) +static inline void inetpeer_set_addr_v4(struct inetpeer_addr *iaddr, __be32 ip)  { -	/* This implicitly clears INETPEER_BASE_BIT */ -	*val = (unsigned long) peer; +	iaddr->a4.addr = ip; +	iaddr->family = AF_INET;  } -static inline bool inetpeer_ptr_set_peer(unsigned long *ptr, struct inet_peer *peer) +static inline __be32 inetpeer_get_addr_v4(struct inetpeer_addr *iaddr)  { -	unsigned long val = (unsigned long) peer; -	unsigned long orig = *ptr; - -	if (!(orig & INETPEER_BASE_BIT) || -	    cmpxchg(ptr, orig, val) != orig) -		return false; -	return true; +	return iaddr->a4.addr;  } -static inline void inetpeer_init_ptr(unsigned long *ptr, struct inet_peer_base *base) +static inline void inetpeer_set_addr_v6(struct inetpeer_addr *iaddr, +					struct in6_addr *in6)  { -	*ptr = (unsigned long) base | INETPEER_BASE_BIT; +	iaddr->a6 = *in6; +	iaddr->family = AF_INET6;  } -static inline void inetpeer_transfer_peer(unsigned long *to, unsigned long *from) +static inline struct in6_addr *inetpeer_get_addr_v6(struct inetpeer_addr *iaddr)  { -	unsigned long val = *from; - -	*to = val; -	if (inetpeer_ptr_is_peer(val)) { -		struct inet_peer *peer = inetpeer_ptr(val); -		atomic_inc(&peer->refcnt); -	} -} - -void inet_peer_base_init(struct inet_peer_base *); - -void inet_initpeers(void) __init; - -#define INETPEER_METRICS_NEW	(~(u32) 0) - -static inline bool inet_metrics_new(const struct inet_peer *p) -{ -	return p->metrics[RTAX_LOCK-1] == INETPEER_METRICS_NEW; +	return &iaddr->a6;  }  /* can be called with or without local BH being disabled */ @@ -137,11 +105,12 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,  static inline struct inet_peer *inet_getpeer_v4(struct inet_peer_base *base,  						__be32 v4daddr, -						int create) +						int vif, int create)  {  	struct inetpeer_addr daddr; -	daddr.addr.a4 = v4daddr; +	daddr.a4.addr = v4daddr; +	daddr.a4.vif = vif;  	daddr.family = AF_INET;  	return inet_getpeer(base, &daddr, create);  } @@ -152,23 +121,36 @@ static inline struct inet_peer *inet_getpeer_v6(struct inet_peer_base *base,  {  	struct inetpeer_addr daddr; -	daddr.addr.in6 = *v6daddr; +	daddr.a6 = *v6daddr;  	daddr.family = AF_INET6;  	return inet_getpeer(base, &daddr, create);  } +static inline int inetpeer_addr_cmp(const struct inetpeer_addr *a, +				    const struct inetpeer_addr *b) +{ +	int i, n; + +	if (a->family == AF_INET) +		n = sizeof(a->a4) / sizeof(u32); +	else +		n = sizeof(a->a6) / sizeof(u32); + +	for (i = 0; i < n; i++) { +		if (a->key[i] == b->key[i]) +			continue; +		if (a->key[i] < b->key[i]) +			return -1; +		return 1; +	} + +	return 0; +} +  /* can be called from BH context or outside */  void inet_putpeer(struct inet_peer *p);  bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout);  void inetpeer_invalidate_tree(struct inet_peer_base *); -/* - * temporary check to make sure we dont access rid, tcp_ts, - * tcp_ts_stamp if no refcount is taken on inet_peer - */ -static inline void inet_peer_refcheck(const struct inet_peer *p) -{ -	WARN_ON_ONCE(atomic_read(&p->refcnt) <= 0); -}  #endif /* _NET_INETPEER_H */ diff --git a/include/net/ip.h b/include/net/ip.h index 0750a186ea63..9b9ca2839399 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -161,6 +161,7 @@ static inline __u8 get_rtconn_flags(struct ipcm_cookie* ipc, struct sock* sk)  }  /* datagram.c */ +int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);  int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);  void ip4_datagram_release_cb(struct sock *sk); @@ -201,10 +202,20 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,  #define NET_ADD_STATS_BH(net, field, adnd) SNMP_ADD_STATS_BH((net)->mib.net_statistics, field, adnd)  #define NET_ADD_STATS_USER(net, field, adnd) SNMP_ADD_STATS_USER((net)->mib.net_statistics, field, adnd) +u64 snmp_get_cpu_field(void __percpu *mib, int cpu, int offct);  unsigned long snmp_fold_field(void __percpu *mib, int offt);  #if BITS_PER_LONG==32 +u64 snmp_get_cpu_field64(void __percpu *mib, int cpu, int offct, +			 size_t syncp_offset);  u64 snmp_fold_field64(void __percpu *mib, int offt, size_t sync_off);  #else +static inline u64  snmp_get_cpu_field64(void __percpu *mib, int cpu, int offct, +					size_t syncp_offset) +{ +	return snmp_get_cpu_field(mib, cpu, offct); + +} +  static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_off)  {  	return snmp_fold_field(mib, offt); @@ -369,22 +380,6 @@ static inline void iph_to_flow_copy_v4addrs(struct flow_keys *flow,  	flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;  } -static inline void inet_set_txhash(struct sock *sk) -{ -	struct inet_sock *inet = inet_sk(sk); -	struct flow_keys keys; - -	memset(&keys, 0, sizeof(keys)); - -	keys.addrs.v4addrs.src = inet->inet_saddr; -	keys.addrs.v4addrs.dst = inet->inet_daddr; -	keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; -	keys.ports.src = inet->inet_sport; -	keys.ports.dst = inet->inet_dport; - -	sk->sk_txhash = flow_hash_from_keys(&keys); -} -  static inline __wsum inet_gro_compute_pseudo(struct sk_buff *skb, int proto)  {  	const struct iphdr *iph = skb_gro_network_header(skb); @@ -473,6 +468,11 @@ static __inline__ void inet_reset_saddr(struct sock *sk)  #endif +static inline unsigned int ipv4_addr_hash(__be32 ip) +{ +	return (__force unsigned int) ip; +} +  bool ip_call_ra_chain(struct sk_buff *skb);  /* diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 3b76849c190f..063d30474cf6 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -51,6 +51,8 @@ struct fib6_config {  	struct nlattr	*fc_mp;  	struct nl_info	fc_nlinfo; +	struct nlattr	*fc_encap; +	u16		fc_encap_type;  };  struct fib6_node { diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 49c142bdf01e..a37d0432bebd 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -44,7 +44,9 @@ struct fib_config {  	u32			fc_flow;  	u32			fc_nlflags;  	struct nl_info		fc_nlinfo; - }; +	struct nlattr		*fc_encap; +	u16			fc_encap_type; +};  struct fib_info;  struct rtable; @@ -89,6 +91,7 @@ struct fib_nh {  	struct rtable __rcu * __percpu *nh_pcpu_rth_output;  	struct rtable __rcu	*nh_rth_input;  	struct fnhe_hash_bucket	__rcu *nh_exceptions; +	struct lwtunnel_state	*nh_lwtstate;  };  /* @@ -183,7 +186,6 @@ __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh);  struct fib_table {  	struct hlist_node	tb_hlist;  	u32			tb_id; -	int			tb_default;  	int			tb_num_default;  	struct rcu_head		rcu;  	unsigned long 		*tb_data; @@ -290,7 +292,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb);  int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,  			u8 tos, int oif, struct net_device *dev,  			struct in_device *idev, u32 *itag); -void fib_select_default(struct fib_result *res); +void fib_select_default(const struct flowi4 *flp, struct fib_result *res);  #ifdef CONFIG_IP_ROUTE_CLASSID  static inline int fib_num_tclassid_users(struct net *net)  { diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index d8214cb88bbc..9a6a3ba888e8 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -4,14 +4,15 @@  #include <linux/if_tunnel.h>  #include <linux/netdevice.h>  #include <linux/skbuff.h> +#include <linux/socket.h>  #include <linux/types.h>  #include <linux/u64_stats_sync.h>  #include <net/dsfield.h>  #include <net/gro_cells.h>  #include <net/inet_ecn.h> -#include <net/ip.h>  #include <net/netns/generic.h>  #include <net/rtnetlink.h> +#include <net/lwtunnel.h>  #if IS_ENABLED(CONFIG_IPV6)  #include <net/ipv6.h> @@ -22,6 +23,44 @@  /* Keep error state on tunnel for 30 sec */  #define IPTUNNEL_ERR_TIMEO	(30*HZ) +/* Used to memset ip_tunnel padding. */ +#define IP_TUNNEL_KEY_SIZE	offsetofend(struct ip_tunnel_key, tp_dst) + +/* Used to memset ipv4 address padding. */ +#define IP_TUNNEL_KEY_IPV4_PAD	offsetofend(struct ip_tunnel_key, u.ipv4.dst) +#define IP_TUNNEL_KEY_IPV4_PAD_LEN				\ +	(FIELD_SIZEOF(struct ip_tunnel_key, u) -		\ +	 FIELD_SIZEOF(struct ip_tunnel_key, u.ipv4)) + +struct ip_tunnel_key { +	__be64			tun_id; +	union { +		struct { +			__be32	src; +			__be32	dst; +		} ipv4; +		struct { +			struct in6_addr src; +			struct in6_addr dst; +		} ipv6; +	} u; +	__be16			tun_flags; +	u8			tos;		/* TOS for IPv4, TC for IPv6 */ +	u8			ttl;		/* TTL for IPv4, HL for IPv6 */ +	__be16			tp_src; +	__be16			tp_dst; +}; + +/* Flags for ip_tunnel_info mode. */ +#define IP_TUNNEL_INFO_TX	0x01	/* represents tx tunnel parameters */ +#define IP_TUNNEL_INFO_IPV6	0x02	/* key contains IPv6 addresses */ + +struct ip_tunnel_info { +	struct ip_tunnel_key	key; +	u8			options_len; +	u8			mode; +}; +  /* 6rd prefix/relay information */  #ifdef CONFIG_IPV6_SIT_6RD  struct ip_tunnel_6rd_parm { @@ -33,8 +72,8 @@ struct ip_tunnel_6rd_parm {  #endif  struct ip_tunnel_encap { -	__u16			type; -	__u16			flags; +	u16			type; +	u16			flags;  	__be16			sport;  	__be16			dport;  }; @@ -51,6 +90,8 @@ struct ip_tunnel_dst {  	__be32				 saddr;  }; +struct metadata_dst; +  struct ip_tunnel {  	struct ip_tunnel __rcu	*next;  	struct hlist_node hash_node; @@ -62,8 +103,8 @@ struct ip_tunnel {  					 * arrived */  	/* These four fields used only by GRE */ -	__u32		i_seqno;	/* The last seen seqno	*/ -	__u32		o_seqno;	/* The last output seqno */ +	u32		i_seqno;	/* The last seen seqno	*/ +	u32		o_seqno;	/* The last output seqno */  	int		tun_hlen;	/* Precalculated header length */  	int		mlink; @@ -84,6 +125,7 @@ struct ip_tunnel {  	unsigned int		prl_count;	/* # of entries in PRL */  	int			ip_tnl_net_id;  	struct gro_cells	gro_cells; +	bool			collect_md;  };  #define TUNNEL_CSUM		__cpu_to_be16(0x01) @@ -118,6 +160,7 @@ struct tnl_ptk_info {  struct ip_tunnel_net {  	struct net_device *fb_tunnel_dev;  	struct hlist_head tunnels[IP_TNL_HASH_SIZE]; +	struct ip_tunnel __rcu *collect_md_tun;  };  struct ip_tunnel_encap_ops { @@ -136,6 +179,40 @@ int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *op,  int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *op,  			    unsigned int num); +static inline void ip_tunnel_key_init(struct ip_tunnel_key *key, +				      __be32 saddr, __be32 daddr, +				      u8 tos, u8 ttl, +				      __be16 tp_src, __be16 tp_dst, +				      __be64 tun_id, __be16 tun_flags) +{ +	key->tun_id = tun_id; +	key->u.ipv4.src = saddr; +	key->u.ipv4.dst = daddr; +	memset((unsigned char *)key + IP_TUNNEL_KEY_IPV4_PAD, +	       0, IP_TUNNEL_KEY_IPV4_PAD_LEN); +	key->tos = tos; +	key->ttl = ttl; +	key->tun_flags = tun_flags; + +	/* For the tunnel types on the top of IPsec, the tp_src and tp_dst of +	 * the upper tunnel are used. +	 * E.g: GRE over IPSEC, the tp_src and tp_port are zero. +	 */ +	key->tp_src = tp_src; +	key->tp_dst = tp_dst; + +	/* Clear struct padding. */ +	if (sizeof(*key) != IP_TUNNEL_KEY_SIZE) +		memset((unsigned char *)key + IP_TUNNEL_KEY_SIZE, +		       0, sizeof(*key) - IP_TUNNEL_KEY_SIZE); +} + +static inline unsigned short ip_tunnel_info_af(const struct ip_tunnel_info +					       *tun_info) +{ +	return tun_info->mode & IP_TUNNEL_INFO_IPV6 ? AF_INET6 : AF_INET; +} +  #ifdef CONFIG_INET  int ip_tunnel_init(struct net_device *dev); @@ -163,7 +240,8 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,  				   __be32 key);  int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, -		  const struct tnl_ptk_info *tpi, bool log_ecn_error); +		  const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, +		  bool log_ecn_error);  int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],  			 struct ip_tunnel_parm *p);  int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], @@ -196,8 +274,8 @@ static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph,  int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto);  int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, -		  __be32 src, __be32 dst, __u8 proto, -		  __u8 tos, __u8 ttl, __be16 df, bool xnet); +		  __be32 src, __be32 dst, u8 proto, +		  u8 tos, u8 ttl, __be16 df, bool xnet);  struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, bool gre_csum,  					 int gso_type_mask); @@ -221,6 +299,57 @@ static inline void iptunnel_xmit_stats(int err,  	}  } +static inline void *ip_tunnel_info_opts(struct ip_tunnel_info *info) +{ +	return info + 1; +} + +static inline void ip_tunnel_info_opts_get(void *to, +					   const struct ip_tunnel_info *info) +{ +	memcpy(to, info + 1, info->options_len); +} + +static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, +					   const void *from, int len) +{ +	memcpy(ip_tunnel_info_opts(info), from, len); +	info->options_len = len; +} + +static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate) +{ +	return (struct ip_tunnel_info *)lwtstate->data; +} + +extern struct static_key ip_tunnel_metadata_cnt; + +/* Returns > 0 if metadata should be collected */ +static inline int ip_tunnel_collect_metadata(void) +{ +	return static_key_false(&ip_tunnel_metadata_cnt); +} + +void __init ip_tunnel_core_init(void); + +void ip_tunnel_need_metadata(void); +void ip_tunnel_unneed_metadata(void); + +#else /* CONFIG_INET */ + +static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate) +{ +	return NULL; +} + +static inline void ip_tunnel_need_metadata(void) +{ +} + +static inline void ip_tunnel_unneed_metadata(void) +{ +} +  #endif /* CONFIG_INET */  #endif /* __NET_IP_TUNNELS_H */ diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 4e3731ee4eac..9b9ca87a4210 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -846,6 +846,17 @@ struct ipvs_master_sync_state {  /* How much time to keep dests in trash */  #define IP_VS_DEST_TRASH_PERIOD		(120 * HZ) +struct ipvs_sync_daemon_cfg { +	union nf_inet_addr	mcast_group; +	int			syncid; +	u16			sync_maxlen; +	u16			mcast_port; +	u8			mcast_af; +	u8			mcast_ttl; +	/* multicast interface name */ +	char			mcast_ifn[IP_VS_IFNAME_MAXLEN]; +}; +  /* IPVS in network namespace */  struct netns_ipvs {  	int			gen;		/* Generation */ @@ -961,15 +972,10 @@ struct netns_ipvs {  	spinlock_t		sync_buff_lock;  	struct task_struct	**backup_threads;  	int			threads_mask; -	int			send_mesg_maxlen; -	int			recv_mesg_maxlen;  	volatile int		sync_state; -	volatile int		master_syncid; -	volatile int		backup_syncid;  	struct mutex		sync_mutex; -	/* multicast interface name */ -	char			master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; -	char			backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; +	struct ipvs_sync_daemon_cfg	mcfg;	/* Master Configuration */ +	struct ipvs_sync_daemon_cfg	bcfg;	/* Backup Configuration */  	/* net name space ptr */  	struct net		*net;            /* Needed by timer routines */  	/* Number of heterogeneous destinations, needed becaus heterogeneous @@ -1408,7 +1414,8 @@ static inline void ip_vs_dest_put_and_free(struct ip_vs_dest *dest)  /* IPVS sync daemon data and function prototypes   * (from ip_vs_sync.c)   */ -int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid); +int start_sync_thread(struct net *net, struct ipvs_sync_daemon_cfg *cfg, +		      int state);  int stop_sync_thread(struct net *net, int state);  void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 82dbdb092a5d..711cca428cc8 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -707,54 +707,69 @@ static inline void iph_to_flow_copy_v6addrs(struct flow_keys *flow,  }  #if IS_ENABLED(CONFIG_IPV6) -static inline void ip6_set_txhash(struct sock *sk) -{ -	struct inet_sock *inet = inet_sk(sk); -	struct ipv6_pinfo *np = inet6_sk(sk); -	struct flow_keys keys; -	memset(&keys, 0, sizeof(keys)); +/* Sysctl settings for net ipv6.auto_flowlabels */ +#define IP6_AUTO_FLOW_LABEL_OFF		0 +#define IP6_AUTO_FLOW_LABEL_OPTOUT	1 +#define IP6_AUTO_FLOW_LABEL_OPTIN	2 +#define IP6_AUTO_FLOW_LABEL_FORCED	3 -	memcpy(&keys.addrs.v6addrs.src, &np->saddr, -	       sizeof(keys.addrs.v6addrs.src)); -	memcpy(&keys.addrs.v6addrs.dst, &sk->sk_v6_daddr, -	       sizeof(keys.addrs.v6addrs.dst)); -	keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; -	keys.ports.src = inet->inet_sport; -	keys.ports.dst = inet->inet_dport; +#define IP6_AUTO_FLOW_LABEL_MAX		IP6_AUTO_FLOW_LABEL_FORCED -	sk->sk_txhash = flow_hash_from_keys(&keys); -} +#define IP6_DEFAULT_AUTO_FLOW_LABELS	IP6_AUTO_FLOW_LABEL_OPTOUT  static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb, -					__be32 flowlabel, bool autolabel) +					__be32 flowlabel, bool autolabel, +					struct flowi6 *fl6)  { -	if (!flowlabel && (autolabel || net->ipv6.sysctl.auto_flowlabels)) { -		u32 hash; +	u32 hash; + +	if (flowlabel || +	    net->ipv6.sysctl.auto_flowlabels == IP6_AUTO_FLOW_LABEL_OFF || +	    (!autolabel && +	     net->ipv6.sysctl.auto_flowlabels != IP6_AUTO_FLOW_LABEL_FORCED)) +		return flowlabel; -		hash = skb_get_hash(skb); +	hash = skb_get_hash_flowi6(skb, fl6); -		/* Since this is being sent on the wire obfuscate hash a bit -		 * to minimize possbility that any useful information to an -		 * attacker is leaked. Only lower 20 bits are relevant. -		 */ -		hash ^= hash >> 12; +	/* Since this is being sent on the wire obfuscate hash a bit +	 * to minimize possbility that any useful information to an +	 * attacker is leaked. Only lower 20 bits are relevant. +	 */ +	rol32(hash, 16); -		flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK; +	flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK; -		if (net->ipv6.sysctl.flowlabel_state_ranges) -			flowlabel |= IPV6_FLOWLABEL_STATELESS_FLAG; -	} +	if (net->ipv6.sysctl.flowlabel_state_ranges) +		flowlabel |= IPV6_FLOWLABEL_STATELESS_FLAG;  	return flowlabel;  } + +static inline int ip6_default_np_autolabel(struct net *net) +{ +	switch (net->ipv6.sysctl.auto_flowlabels) { +	case IP6_AUTO_FLOW_LABEL_OFF: +	case IP6_AUTO_FLOW_LABEL_OPTIN: +	default: +		return 0; +	case IP6_AUTO_FLOW_LABEL_OPTOUT: +	case IP6_AUTO_FLOW_LABEL_FORCED: +		return 1; +	} +}  #else  static inline void ip6_set_txhash(struct sock *sk) { }  static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb, -					__be32 flowlabel, bool autolabel) +					__be32 flowlabel, bool autolabel, +					struct flowi6 *fl6)  {  	return flowlabel;  } +static inline int ip6_default_np_autolabel(struct net *net) +{ +	return 0; +}  #endif @@ -832,7 +847,8 @@ static inline struct sk_buff *ip6_finish_skb(struct sock *sk)  			      &inet6_sk(sk)->cork);  } -int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6); +int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst, +		   struct flowi6 *fl6);  struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,  				      const struct in6_addr *final_dst);  struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h new file mode 100644 index 000000000000..fce0e35e74d0 --- /dev/null +++ b/include/net/lwtunnel.h @@ -0,0 +1,175 @@ +#ifndef __NET_LWTUNNEL_H +#define __NET_LWTUNNEL_H 1 + +#include <linux/lwtunnel.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/types.h> +#include <net/route.h> + +#define LWTUNNEL_HASH_BITS   7 +#define LWTUNNEL_HASH_SIZE   (1 << LWTUNNEL_HASH_BITS) + +/* lw tunnel state flags */ +#define LWTUNNEL_STATE_OUTPUT_REDIRECT	BIT(0) +#define LWTUNNEL_STATE_INPUT_REDIRECT	BIT(1) + +struct lwtunnel_state { +	__u16		type; +	__u16		flags; +	atomic_t	refcnt; +	int		(*orig_output)(struct sock *sk, struct sk_buff *skb); +	int		(*orig_input)(struct sk_buff *); +	int             len; +	__u8            data[0]; +}; + +struct lwtunnel_encap_ops { +	int (*build_state)(struct net_device *dev, struct nlattr *encap, +			   unsigned int family, const void *cfg, +			   struct lwtunnel_state **ts); +	int (*output)(struct sock *sk, struct sk_buff *skb); +	int (*input)(struct sk_buff *skb); +	int (*fill_encap)(struct sk_buff *skb, +			  struct lwtunnel_state *lwtstate); +	int (*get_encap_size)(struct lwtunnel_state *lwtstate); +	int (*cmp_encap)(struct lwtunnel_state *a, struct lwtunnel_state *b); +}; + +#ifdef CONFIG_LWTUNNEL +static inline void lwtstate_free(struct lwtunnel_state *lws) +{ +	kfree(lws); +} + +static inline struct lwtunnel_state * +lwtstate_get(struct lwtunnel_state *lws) +{ +	if (lws) +		atomic_inc(&lws->refcnt); + +	return lws; +} + +static inline void lwtstate_put(struct lwtunnel_state *lws) +{ +	if (!lws) +		return; + +	if (atomic_dec_and_test(&lws->refcnt)) +		lwtstate_free(lws); +} + +static inline bool lwtunnel_output_redirect(struct lwtunnel_state *lwtstate) +{ +	if (lwtstate && (lwtstate->flags & LWTUNNEL_STATE_OUTPUT_REDIRECT)) +		return true; + +	return false; +} + +static inline bool lwtunnel_input_redirect(struct lwtunnel_state *lwtstate) +{ +	if (lwtstate && (lwtstate->flags & LWTUNNEL_STATE_INPUT_REDIRECT)) +		return true; + +	return false; +} +int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op, +			   unsigned int num); +int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, +			   unsigned int num); +int lwtunnel_build_state(struct net_device *dev, u16 encap_type, +			 struct nlattr *encap, +			 unsigned int family, const void *cfg, +			 struct lwtunnel_state **lws); +int lwtunnel_fill_encap(struct sk_buff *skb, +			struct lwtunnel_state *lwtstate); +int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate); +struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len); +int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b); +int lwtunnel_output(struct sock *sk, struct sk_buff *skb); +int lwtunnel_input(struct sk_buff *skb); + +#else + +static inline void lwtstate_free(struct lwtunnel_state *lws) +{ +} + +static inline struct lwtunnel_state * +lwtstate_get(struct lwtunnel_state *lws) +{ +	return lws; +} + +static inline void lwtstate_put(struct lwtunnel_state *lws) +{ +} + +static inline bool lwtunnel_output_redirect(struct lwtunnel_state *lwtstate) +{ +	return false; +} + +static inline bool lwtunnel_input_redirect(struct lwtunnel_state *lwtstate) +{ +	return false; +} + +static inline int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op, +					 unsigned int num) +{ +	return -EOPNOTSUPP; + +} + +static inline int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, +					 unsigned int num) +{ +	return -EOPNOTSUPP; +} + +static inline int lwtunnel_build_state(struct net_device *dev, u16 encap_type, +				       struct nlattr *encap, +				       unsigned int family, const void *cfg, +				       struct lwtunnel_state **lws) +{ +	return -EOPNOTSUPP; +} + +static inline int lwtunnel_fill_encap(struct sk_buff *skb, +				      struct lwtunnel_state *lwtstate) +{ +	return 0; +} + +static inline int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate) +{ +	return 0; +} + +static inline struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len) +{ +	return NULL; +} + +static inline int lwtunnel_cmp_encap(struct lwtunnel_state *a, +				     struct lwtunnel_state *b) +{ +	return 0; +} + +static inline int lwtunnel_output(struct sock *sk, struct sk_buff *skb) +{ +	return -EOPNOTSUPP; +} + +static inline int lwtunnel_input(struct sk_buff *skb) +{ +	return -EOPNOTSUPP; +} + +#endif + +#endif /* __NET_LWTUNNEL_H */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 6b1077c2a63f..bfc569498bfa 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -477,7 +477,9 @@ struct ieee80211_event {   * @chandef: Channel definition for this BSS -- the hardware might be   *	configured a higher bandwidth than this BSS uses, for example.   * @ht_operation_mode: HT operation mode like in &struct ieee80211_ht_operation. - *	This field is only valid when the channel type is one of the HT types. + *	This field is only valid when the channel is a wide HT/VHT channel. + *	Note that with TDLS this can be the case (channel is HT, protection must + *	be used from this field) even when the BSS association isn't using HT.   * @cqm_rssi_thold: Connection quality monitor RSSI threshold, a zero value   *	implies disabled   * @cqm_rssi_hyst: Connection quality monitor RSSI hysteresis @@ -973,6 +975,10 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)   * @RX_FLAG_IV_STRIPPED: The IV/ICV are stripped from this frame.   *	If this flag is set, the stack cannot do any replay detection   *	hence the driver or hardware will have to do that. + * @RX_FLAG_PN_VALIDATED: Currently only valid for CCMP/GCMP frames, this + *	flag indicates that the PN was verified for replay protection. + *	Note that this flag is also currently only supported when a frame + *	is also decrypted (ie. @RX_FLAG_DECRYPTED must be set)   * @RX_FLAG_FAILED_FCS_CRC: Set this flag if the FCS check failed on   *	the frame.   * @RX_FLAG_FAILED_PLCP_CRC: Set this flag if the PCLP check failed on @@ -997,9 +1003,6 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)   * @RX_FLAG_AMPDU_DETAILS: A-MPDU details are known, in particular the reference   *	number (@ampdu_reference) must be populated and be a distinct number for   *	each A-MPDU - * @RX_FLAG_AMPDU_REPORT_ZEROLEN: driver reports 0-length subframes - * @RX_FLAG_AMPDU_IS_ZEROLEN: This is a zero-length subframe, for - *	monitoring purposes only   * @RX_FLAG_AMPDU_LAST_KNOWN: last subframe is known, should be set on all   *	subframes of a single A-MPDU   * @RX_FLAG_AMPDU_IS_LAST: this subframe is the last subframe of the A-MPDU @@ -1039,8 +1042,8 @@ enum mac80211_rx_flags {  	RX_FLAG_NO_SIGNAL_VAL		= BIT(12),  	RX_FLAG_HT_GF			= BIT(13),  	RX_FLAG_AMPDU_DETAILS		= BIT(14), -	RX_FLAG_AMPDU_REPORT_ZEROLEN	= BIT(15), -	RX_FLAG_AMPDU_IS_ZEROLEN	= BIT(16), +	RX_FLAG_PN_VALIDATED		= BIT(15), +	/* bit 16 free */  	RX_FLAG_AMPDU_LAST_KNOWN	= BIT(17),  	RX_FLAG_AMPDU_IS_LAST		= BIT(18),  	RX_FLAG_AMPDU_DELIM_CRC_ERROR	= BIT(19), @@ -1491,8 +1494,10 @@ enum ieee80211_key_flags {   * 	- Temporal Authenticator Rx MIC Key (64 bits)   * @icv_len: The ICV length for this key type   * @iv_len: The IV length for this key type + * @drv_priv: pointer for driver use   */  struct ieee80211_key_conf { +	void *drv_priv;  	atomic64_t tx_pn;  	u32 cipher;  	u8 icv_len; @@ -1675,7 +1680,6 @@ struct ieee80211_sta_rates {   * @tdls: indicates whether the STA is a TDLS peer   * @tdls_initiator: indicates the STA is an initiator of the TDLS link. Only   *	valid if the STA is a TDLS peer in the first place. - * @mfp: indicates whether the STA uses management frame protection or not.   * @txq: per-TID data TX queues (if driver uses the TXQ abstraction)   */  struct ieee80211_sta { @@ -1693,7 +1697,6 @@ struct ieee80211_sta {  	struct ieee80211_sta_rates __rcu *rates;  	bool tdls;  	bool tdls_initiator; -	bool mfp;  	struct ieee80211_txq *txq[IEEE80211_NUM_TIDS]; @@ -1888,6 +1891,9 @@ struct ieee80211_txq {   * @IEEE80211_HW_SINGLE_SCAN_ON_ALL_BANDS: The HW supports scanning on all bands   *	in one command, mac80211 doesn't have to run separate scans per band.   * + * @IEEE80211_HW_TDLS_WIDER_BW: The device/driver supports wider bandwidth + *	than then BSS bandwidth for a TDLS link on the base channel. + *   * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays   */  enum ieee80211_hw_flags { @@ -1920,6 +1926,7 @@ enum ieee80211_hw_flags {  	IEEE80211_HW_CHANCTX_STA_CSA,  	IEEE80211_HW_SUPPORTS_CLONED_SKBS,  	IEEE80211_HW_SINGLE_SCAN_ON_ALL_BANDS, +	IEEE80211_HW_TDLS_WIDER_BW,  	/* keep last, obviously */  	NUM_IEEE80211_HW_FLAGS @@ -3696,20 +3703,28 @@ void ieee80211_free_hw(struct ieee80211_hw *hw);  void ieee80211_restart_hw(struct ieee80211_hw *hw);  /** - * ieee80211_napi_add - initialize mac80211 NAPI context - * @hw: the hardware to initialize the NAPI context on - * @napi: the NAPI context to initialize - * @napi_dev: dummy NAPI netdevice, here to not waste the space if the - *	driver doesn't use NAPI - * @poll: poll function - * @weight: default weight + * ieee80211_rx_napi - receive frame from NAPI context + * + * Use this function to hand received frames to mac80211. The receive + * buffer in @skb must start with an IEEE 802.11 header. In case of a + * paged @skb is used, the driver is recommended to put the ieee80211 + * header of the frame on the linear part of the @skb to avoid memory + * allocation and/or memcpy by the stack. + * + * This function may not be called in IRQ context. Calls to this function + * for a single hardware must be synchronized against each other. Calls to + * this function, ieee80211_rx_ni() and ieee80211_rx_irqsafe() may not be + * mixed for a single hardware. Must not run concurrently with + * ieee80211_tx_status() or ieee80211_tx_status_ni(). + * + * This function must be called with BHs disabled.   * - * See also netif_napi_add(). + * @hw: the hardware this frame came in on + * @skb: the buffer to receive, owned by mac80211 after this call + * @napi: the NAPI context   */ -void ieee80211_napi_add(struct ieee80211_hw *hw, struct napi_struct *napi, -			struct net_device *napi_dev, -			int (*poll)(struct napi_struct *, int), -			int weight); +void ieee80211_rx_napi(struct ieee80211_hw *hw, struct sk_buff *skb, +		       struct napi_struct *napi);  /**   * ieee80211_rx - receive frame @@ -3731,7 +3746,10 @@ void ieee80211_napi_add(struct ieee80211_hw *hw, struct napi_struct *napi,   * @hw: the hardware this frame came in on   * @skb: the buffer to receive, owned by mac80211 after this call   */ -void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb); +static inline void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb) +{ +	ieee80211_rx_napi(hw, skb, NULL); +}  /**   * ieee80211_rx_irqsafe - receive frame @@ -4315,19 +4333,6 @@ void ieee80211_get_tkip_p2k(struct ieee80211_key_conf *keyconf,  			    struct sk_buff *skb, u8 *p2k);  /** - * ieee80211_aes_cmac_calculate_k1_k2 - calculate the AES-CMAC sub keys - * - * This function computes the two AES-CMAC sub-keys, based on the - * previously installed master key. - * - * @keyconf: the parameter passed with the set key - * @k1: a buffer to be filled with the 1st sub-key - * @k2: a buffer to be filled with the 2nd sub-key - */ -void ieee80211_aes_cmac_calculate_k1_k2(struct ieee80211_key_conf *keyconf, -					u8 *k1, u8 *k2); - -/**   * ieee80211_get_key_tx_seq - get key TX sequence counter   *   * @keyconf: the parameter passed with the set key diff --git a/include/net/mac802154.h b/include/net/mac802154.h index f534a46911dc..b7f99615224b 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -321,23 +321,6 @@ int ieee802154_register_hw(struct ieee802154_hw *hw);  void ieee802154_unregister_hw(struct ieee802154_hw *hw);  /** - * ieee802154_rx - receive frame - * - * Use this function to hand received frames to mac802154. The receive - * buffer in @skb must start with an IEEE 802.15.4 header. In case of a - * paged @skb is used, the driver is recommended to put the ieee802154 - * header of the frame on the linear part of the @skb to avoid memory - * allocation and/or memcpy by the stack. - * - * This function may not be called in IRQ context. Calls to this function - * for a single hardware must be synchronized against each other. - * - * @hw: the hardware this frame came in on - * @skb: the buffer to receive, owned by mac802154 after this call - */ -void ieee802154_rx(struct ieee802154_hw *hw, struct sk_buff *skb); - -/**   * ieee802154_rx_irqsafe - receive frame   *   * Like ieee802154_rx() but can be called in IRQ context diff --git a/include/net/mpls_iptunnel.h b/include/net/mpls_iptunnel.h new file mode 100644 index 000000000000..4757997f76ed --- /dev/null +++ b/include/net/mpls_iptunnel.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2015 Cumulus Networks, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef _NET_MPLS_IPTUNNEL_H +#define _NET_MPLS_IPTUNNEL_H 1 + +#define MAX_NEW_LABELS 2 + +struct mpls_iptunnel_encap { +	u32	label[MAX_NEW_LABELS]; +	u32	labels; +}; + +static inline struct mpls_iptunnel_encap *mpls_lwtunnel_encap(struct lwtunnel_state *lwtstate) +{ +	return (struct mpls_iptunnel_encap *)lwtstate->data; +} + +#endif diff --git a/include/net/ndisc.h b/include/net/ndisc.h index b3a7751251b4..aba5695fadb0 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -182,7 +182,8 @@ int ndisc_rcv(struct sk_buff *skb);  void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,  		   const struct in6_addr *solicit, -		   const struct in6_addr *daddr, const struct in6_addr *saddr); +		   const struct in6_addr *daddr, const struct in6_addr *saddr, +		   struct sk_buff *oskb);  void ndisc_send_rs(struct net_device *dev,  		   const struct in6_addr *saddr, const struct in6_addr *daddr); diff --git a/include/net/neighbour.h b/include/net/neighbour.h index bd33e66f49aa..8b683841e574 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -125,6 +125,7 @@ struct neigh_statistics {  	unsigned long forced_gc_runs;	/* number of forced GC runs */  	unsigned long unres_discards;	/* number of unresolved drops */ +	unsigned long table_fulls;      /* times even gc couldn't help */  };  #define NEIGH_CACHE_STAT_INC(tbl, field) this_cpu_inc((tbl)->stats->field) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index e951453e0a23..2dcea635ecce 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -118,6 +118,9 @@ struct net {  #endif  	struct sock		*nfnl;  	struct sock		*nfnl_stash; +#if IS_ENABLED(CONFIG_NETFILTER_NETLINK_ACCT) +	struct list_head        nfnl_acct_list; +#endif  #endif  #ifdef CONFIG_WEXT_CORE  	struct sk_buff_head	wext_nlevents; diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h index bab824bde92c..d4c6b5f30acd 100644 --- a/include/net/netfilter/br_netfilter.h +++ b/include/net/netfilter/br_netfilter.h @@ -59,7 +59,7 @@ static inline unsigned int  br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, struct sk_buff *skb,  		       const struct nf_hook_state *state)  { -	return NF_DROP; +	return NF_ACCEPT;  }  #endif diff --git a/include/net/netfilter/ipv4/nf_dup_ipv4.h b/include/net/netfilter/ipv4/nf_dup_ipv4.h new file mode 100644 index 000000000000..42008f10dfc4 --- /dev/null +++ b/include/net/netfilter/ipv4/nf_dup_ipv4.h @@ -0,0 +1,7 @@ +#ifndef _NF_DUP_IPV4_H_ +#define _NF_DUP_IPV4_H_ + +void nf_dup_ipv4(struct sk_buff *skb, unsigned int hooknum, +		 const struct in_addr *gw, int oif); + +#endif /* _NF_DUP_IPV4_H_ */ diff --git a/include/net/netfilter/ipv6/nf_dup_ipv6.h b/include/net/netfilter/ipv6/nf_dup_ipv6.h new file mode 100644 index 000000000000..ed6bd66fa5a0 --- /dev/null +++ b/include/net/netfilter/ipv6/nf_dup_ipv6.h @@ -0,0 +1,7 @@ +#ifndef _NF_DUP_IPV6_H_ +#define _NF_DUP_IPV6_H_ + +void nf_dup_ipv6(struct sk_buff *skb, unsigned int hooknum, +		 const struct in6_addr *gw, int oif); + +#endif /* _NF_DUP_IPV6_H_ */ diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 095433b8a8b0..e8ad46834df8 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -250,8 +250,12 @@ void nf_ct_untracked_status_or(unsigned long bits);  void nf_ct_iterate_cleanup(struct net *net,  			   int (*iter)(struct nf_conn *i, void *data),  			   void *data, u32 portid, int report); + +struct nf_conntrack_zone; +  void nf_conntrack_free(struct nf_conn *ct); -struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone, +struct nf_conn *nf_conntrack_alloc(struct net *net, +				   const struct nf_conntrack_zone *zone,  				   const struct nf_conntrack_tuple *orig,  				   const struct nf_conntrack_tuple *repl,  				   gfp_t gfp); @@ -291,7 +295,10 @@ extern unsigned int nf_conntrack_max;  extern unsigned int nf_conntrack_hash_rnd;  void init_nf_conntrack_hash_rnd(void); -void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl); +struct nf_conn *nf_ct_tmpl_alloc(struct net *net, +				 const struct nf_conntrack_zone *zone, +				 gfp_t flags); +void nf_ct_tmpl_free(struct nf_conn *tmpl);  #define NF_CT_STAT_INC(net, count)	  __this_cpu_inc((net)->ct.stat->count)  #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count) diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index f2f0fa3bb150..c03f9c42b3cd 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -52,7 +52,8 @@ bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,  /* Find a connection corresponding to a tuple. */  struct nf_conntrack_tuple_hash * -nf_conntrack_find_get(struct net *net, u16 zone, +nf_conntrack_find_get(struct net *net, +		      const struct nf_conntrack_zone *zone,  		      const struct nf_conntrack_tuple *tuple);  int __nf_conntrack_confirm(struct sk_buff *skb); diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 3f3aecbc8632..dce56f09ac9a 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -4,7 +4,9 @@  #ifndef _NF_CONNTRACK_EXPECT_H  #define _NF_CONNTRACK_EXPECT_H +  #include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_zones.h>  extern unsigned int nf_ct_expect_hsize;  extern unsigned int nf_ct_expect_max; @@ -76,15 +78,18 @@ int nf_conntrack_expect_init(void);  void nf_conntrack_expect_fini(void);  struct nf_conntrack_expect * -__nf_ct_expect_find(struct net *net, u16 zone, +__nf_ct_expect_find(struct net *net, +		    const struct nf_conntrack_zone *zone,  		    const struct nf_conntrack_tuple *tuple);  struct nf_conntrack_expect * -nf_ct_expect_find_get(struct net *net, u16 zone, +nf_ct_expect_find_get(struct net *net, +		      const struct nf_conntrack_zone *zone,  		      const struct nf_conntrack_tuple *tuple);  struct nf_conntrack_expect * -nf_ct_find_expectation(struct net *net, u16 zone, +nf_ct_find_expectation(struct net *net, +		       const struct nf_conntrack_zone *zone,  		       const struct nf_conntrack_tuple *tuple);  void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h index dec6336bf850..7e2b1d025f50 100644 --- a/include/net/netfilter/nf_conntrack_labels.h +++ b/include/net/netfilter/nf_conntrack_labels.h @@ -54,7 +54,11 @@ int nf_connlabels_replace(struct nf_conn *ct,  #ifdef CONFIG_NF_CONNTRACK_LABELS  int nf_conntrack_labels_init(void);  void nf_conntrack_labels_fini(void); +int nf_connlabels_get(struct net *net, unsigned int n_bits); +void nf_connlabels_put(struct net *net);  #else  static inline int nf_conntrack_labels_init(void) { return 0; }  static inline void nf_conntrack_labels_fini(void) {} +static inline int nf_connlabels_get(struct net *net, unsigned int n_bits) { return 0; } +static inline void nf_connlabels_put(struct net *net) {}  #endif diff --git a/include/net/netfilter/nf_conntrack_zones.h b/include/net/netfilter/nf_conntrack_zones.h index 034efe8d45a5..4e32512cef32 100644 --- a/include/net/netfilter/nf_conntrack_zones.h +++ b/include/net/netfilter/nf_conntrack_zones.h @@ -1,25 +1,89 @@  #ifndef _NF_CONNTRACK_ZONES_H  #define _NF_CONNTRACK_ZONES_H -#define NF_CT_DEFAULT_ZONE	0 +#include <linux/netfilter/nf_conntrack_zones_common.h> -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK)  #include <net/netfilter/nf_conntrack_extend.h> -struct nf_conntrack_zone { -	u16	id; -}; +static inline const struct nf_conntrack_zone * +nf_ct_zone(const struct nf_conn *ct) +{ +	const struct nf_conntrack_zone *nf_ct_zone = NULL; + +#ifdef CONFIG_NF_CONNTRACK_ZONES +	nf_ct_zone = nf_ct_ext_find(ct, NF_CT_EXT_ZONE); +#endif +	return nf_ct_zone ? nf_ct_zone : &nf_ct_zone_dflt; +} + +static inline const struct nf_conntrack_zone * +nf_ct_zone_init(struct nf_conntrack_zone *zone, u16 id, u8 dir, u8 flags) +{ +	zone->id = id; +	zone->flags = flags; +	zone->dir = dir; + +	return zone; +} + +static inline const struct nf_conntrack_zone * +nf_ct_zone_tmpl(const struct nf_conn *tmpl, const struct sk_buff *skb, +		struct nf_conntrack_zone *tmp) +{ +	const struct nf_conntrack_zone *zone; + +	if (!tmpl) +		return &nf_ct_zone_dflt; + +	zone = nf_ct_zone(tmpl); +	if (zone->flags & NF_CT_FLAG_MARK) +		zone = nf_ct_zone_init(tmp, skb->mark, zone->dir, 0); + +	return zone; +} -static inline u16 nf_ct_zone(const struct nf_conn *ct) +static inline int nf_ct_zone_add(struct nf_conn *ct, gfp_t flags, +				 const struct nf_conntrack_zone *info)  {  #ifdef CONFIG_NF_CONNTRACK_ZONES  	struct nf_conntrack_zone *nf_ct_zone; -	nf_ct_zone = nf_ct_ext_find(ct, NF_CT_EXT_ZONE); -	if (nf_ct_zone) -		return nf_ct_zone->id; + +	nf_ct_zone = nf_ct_ext_add(ct, NF_CT_EXT_ZONE, flags); +	if (!nf_ct_zone) +		return -ENOMEM; + +	nf_ct_zone_init(nf_ct_zone, info->id, info->dir, +			info->flags);  #endif -	return NF_CT_DEFAULT_ZONE; +	return 0;  } -#endif /* CONFIG_NF_CONNTRACK || CONFIG_NF_CONNTRACK_MODULE */ +static inline bool nf_ct_zone_matches_dir(const struct nf_conntrack_zone *zone, +					  enum ip_conntrack_dir dir) +{ +	return zone->dir & (1 << dir); +} + +static inline u16 nf_ct_zone_id(const struct nf_conntrack_zone *zone, +				enum ip_conntrack_dir dir) +{ +	return nf_ct_zone_matches_dir(zone, dir) ? +	       zone->id : NF_CT_DEFAULT_ZONE_ID; +} + +static inline bool nf_ct_zone_equal(const struct nf_conn *a, +				    const struct nf_conntrack_zone *b, +				    enum ip_conntrack_dir dir) +{ +	return nf_ct_zone_id(nf_ct_zone(a), dir) == +	       nf_ct_zone_id(b, dir); +} + +static inline bool nf_ct_zone_equal_any(const struct nf_conn *a, +					const struct nf_conntrack_zone *b) +{ +	return nf_ct_zone(a)->id == b->id; +} +#endif /* IS_ENABLED(CONFIG_NF_CONNTRACK) */  #endif /* _NF_CONNTRACK_ZONES_H */ diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 2a246680a6c3..aa8bee72c9d3 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -125,7 +125,7 @@ static inline enum nft_data_types nft_dreg_to_type(enum nft_registers reg)  static inline enum nft_registers nft_type_to_reg(enum nft_data_types type)  { -	return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1; +	return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE;  }  unsigned int nft_parse_register(const struct nlattr *attr); diff --git a/include/net/netfilter/nft_dup.h b/include/net/netfilter/nft_dup.h new file mode 100644 index 000000000000..6b84cf6491a2 --- /dev/null +++ b/include/net/netfilter/nft_dup.h @@ -0,0 +1,9 @@ +#ifndef _NFT_DUP_H_ +#define _NFT_DUP_H_ + +struct nft_dup_inet { +	enum nft_registers	sreg_addr:8; +	enum nft_registers	sreg_dev:8; +}; + +#endif /* _NFT_DUP_H_ */ diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 29d6a94db54d..723b61c82b3f 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -68,7 +68,6 @@ struct ct_pcpu {  	spinlock_t		lock;  	struct hlist_nulls_head unconfirmed;  	struct hlist_nulls_head dying; -	struct hlist_nulls_head tmpl;  };  struct netns_ct { diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 8d93544a2d2b..c0368db6df54 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -31,6 +31,7 @@ struct netns_sysctl_ipv6 {  	int auto_flowlabels;  	int icmpv6_time;  	int anycast_src_echo_reply; +	int ip_nonlocal_bind;  	int fwmark_reflect;  	int idgen_retries;  	int idgen_delay; diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index 532e4ba64f49..38aa4983e2a9 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -14,5 +14,6 @@ struct netns_nf {  #ifdef CONFIG_SYSCTL  	struct ctl_table_header *nf_log_dir_header;  #endif +	struct list_head hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];  };  #endif diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index 01fc8c531115..d0d0f1e53bb9 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -79,6 +79,7 @@ struct nci_ops {  	int   (*close)(struct nci_dev *ndev);  	int   (*send)(struct nci_dev *ndev, struct sk_buff *skb);  	int   (*setup)(struct nci_dev *ndev); +	int   (*post_setup)(struct nci_dev *ndev);  	int   (*fw_download)(struct nci_dev *ndev, const char *firmware_name);  	__u32 (*get_rfprotocol)(struct nci_dev *ndev, __u8 rf_protocol);  	int   (*discover_se)(struct nci_dev *ndev); @@ -277,6 +278,8 @@ int nci_request(struct nci_dev *ndev,  			    unsigned long opt),  		unsigned long opt, __u32 timeout);  int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, __u8 *payload); +int nci_core_reset(struct nci_dev *ndev); +int nci_core_init(struct nci_dev *ndev);  int nci_recv_frame(struct nci_dev *ndev, struct sk_buff *skb);  int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, __u8 *val); diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index f9e58ae45f9c..30afc9a6718c 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -203,6 +203,7 @@ struct nfc_dev {  	int n_vendor_cmds;  	struct nfc_ops *ops; +	struct genl_info *cur_cmd_info;  };  #define to_nfc_dev(_dev) container_of(_dev, struct nfc_dev, dev) @@ -318,4 +319,44 @@ static inline int nfc_set_vendor_cmds(struct nfc_dev *dev,  	return 0;  } +struct sk_buff *__nfc_alloc_vendor_cmd_reply_skb(struct nfc_dev *dev, +						 enum nfc_attrs attr, +						 u32 oui, u32 subcmd, +						 int approxlen); +int nfc_vendor_cmd_reply(struct sk_buff *skb); + +/** + * nfc_vendor_cmd_alloc_reply_skb - allocate vendor command reply + * @dev: nfc device + * @oui: vendor oui + * @approxlen: an upper bound of the length of the data that will + *      be put into the skb + * + * This function allocates and pre-fills an skb for a reply to + * a vendor command. Since it is intended for a reply, calling + * it outside of a vendor command's doit() operation is invalid. + * + * The returned skb is pre-filled with some identifying data in + * a way that any data that is put into the skb (with skb_put(), + * nla_put() or similar) will end up being within the + * %NFC_ATTR_VENDOR_DATA attribute, so all that needs to be done + * with the skb is adding data for the corresponding userspace tool + * which can then read that data out of the vendor data attribute. + * You must not modify the skb in any other way. + * + * When done, call nfc_vendor_cmd_reply() with the skb and return + * its error code as the result of the doit() operation. + * + * Return: An allocated and pre-filled skb. %NULL if any errors happen. + */ +static inline struct sk_buff * +nfc_vendor_cmd_alloc_reply_skb(struct nfc_dev *dev, +				u32 oui, u32 subcmd, int approxlen) +{ +	return __nfc_alloc_vendor_cmd_reply_skb(dev, +						NFC_ATTR_VENDOR_DATA, +						oui, +						subcmd, approxlen); +} +  #endif /* __NET_NFC_H */ diff --git a/include/net/nl802154.h b/include/net/nl802154.h index b0ab530d28cd..cf2713d8b975 100644 --- a/include/net/nl802154.h +++ b/include/net/nl802154.h @@ -52,6 +52,8 @@ enum nl802154_commands {  	NL802154_CMD_SET_LBT_MODE, +	NL802154_CMD_SET_ACKREQ_DEFAULT, +  	/* add new commands above here */  	/* used to define NL802154_CMD_MAX below */ @@ -104,6 +106,8 @@ enum nl802154_attrs {  	NL802154_ATTR_SUPPORTED_COMMANDS, +	NL802154_ATTR_ACKREQ_DEFAULT, +  	/* add attributes here, update the policy in nl802154.c */  	__NL802154_ATTR_AFTER_LAST, diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 2342bf12cb78..401038d2f9b8 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -110,10 +110,8 @@ static inline void qdisc_run(struct Qdisc *q)  		__qdisc_run(q);  } -int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp, -		       struct tcf_result *res);  int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, -		struct tcf_result *res); +		struct tcf_result *res, bool compat_mode);  static inline __be16 tc_skb_protocol(const struct sk_buff *skb)  { diff --git a/include/net/route.h b/include/net/route.h index fe22d03afb6a..cc61cb95f059 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -188,8 +188,12 @@ void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk);  void ip_rt_send_redirect(struct sk_buff *skb);  unsigned int inet_addr_type(struct net *net, __be32 addr); +unsigned int inet_addr_type_table(struct net *net, __be32 addr, u32 tb_id);  unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,  				__be32 addr); +unsigned int inet_addr_type_dev_table(struct net *net, +				      const struct net_device *dev, +				      __be32 addr);  void ip_rt_multicast_event(struct in_device *);  int ip_rt_ioctl(struct net *, unsigned int cmd, void __user *arg);  void ip_rt_get_source(u8 *src, struct sk_buff *skb, struct rtable *rt); @@ -250,6 +254,9 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32  	if (inet_sk(sk)->transparent)  		flow_flags |= FLOWI_FLAG_ANYSRC; +	if (netif_index_is_vrf(sock_net(sk), oif)) +		flow_flags |= FLOWI_FLAG_VRFSRC; +  	flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE,  			   protocol, flow_flags, dst, src, dport, sport);  } diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 343d922d15c2..18fdb98185ab 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -141,6 +141,7 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname,  				    unsigned char name_assign_type,  				    const struct rtnl_link_ops *ops,  				    struct nlattr *tb[]); +int rtnl_delete_link(struct net_device *dev);  int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm);  int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 2738f6f87908..444faa89a55f 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -340,6 +340,7 @@ extern struct Qdisc noop_qdisc;  extern struct Qdisc_ops noop_qdisc_ops;  extern struct Qdisc_ops pfifo_fast_ops;  extern struct Qdisc_ops mq_qdisc_ops; +extern struct Qdisc_ops noqueue_qdisc_ops;  extern const struct Qdisc_ops *default_qdisc_ops;  struct Qdisc_class_common { @@ -513,17 +514,20 @@ static inline void bstats_update(struct gnet_stats_basic_packed *bstats,  	bstats->packets += skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1;  } -static inline void qdisc_bstats_update_cpu(struct Qdisc *sch, -					   const struct sk_buff *skb) +static inline void bstats_cpu_update(struct gnet_stats_basic_cpu *bstats, +				     const struct sk_buff *skb)  { -	struct gnet_stats_basic_cpu *bstats = -				this_cpu_ptr(sch->cpu_bstats); -  	u64_stats_update_begin(&bstats->syncp);  	bstats_update(&bstats->bstats, skb);  	u64_stats_update_end(&bstats->syncp);  } +static inline void qdisc_bstats_cpu_update(struct Qdisc *sch, +					   const struct sk_buff *skb) +{ +	bstats_cpu_update(this_cpu_ptr(sch->cpu_bstats), skb); +} +  static inline void qdisc_bstats_update(struct Qdisc *sch,  				       const struct sk_buff *skb)  { @@ -547,16 +551,24 @@ static inline void __qdisc_qstats_drop(struct Qdisc *sch, int count)  	sch->qstats.drops += count;  } -static inline void qdisc_qstats_drop(struct Qdisc *sch) +static inline void qstats_drop_inc(struct gnet_stats_queue *qstats)  { -	sch->qstats.drops++; +	qstats->drops++;  } -static inline void qdisc_qstats_drop_cpu(struct Qdisc *sch) +static inline void qstats_overlimit_inc(struct gnet_stats_queue *qstats)  { -	struct gnet_stats_queue *qstats = this_cpu_ptr(sch->cpu_qstats); +	qstats->overlimits++; +} -	qstats->drops++; +static inline void qdisc_qstats_drop(struct Qdisc *sch) +{ +	qstats_drop_inc(&sch->qstats); +} + +static inline void qdisc_qstats_cpu_drop(struct Qdisc *sch) +{ +	qstats_drop_inc(this_cpu_ptr(sch->cpu_qstats));  }  static inline void qdisc_qstats_overlimit(struct Qdisc *sch) diff --git a/include/net/sock.h b/include/net/sock.h index 05a8c1aea251..7aa78440559a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -429,7 +429,9 @@ struct sock {  	void			*sk_security;  #endif  	__u32			sk_mark; +#ifdef CONFIG_CGROUP_NET_CLASSID  	u32			sk_classid; +#endif  	struct cg_proto		*sk_cgrp;  	void			(*sk_state_change)(struct sock *sk);  	void			(*sk_data_ready)(struct sock *sk); @@ -902,7 +904,7 @@ void sk_stream_kill_queues(struct sock *sk);  void sk_set_memalloc(struct sock *sk);  void sk_clear_memalloc(struct sock *sk); -int sk_wait_data(struct sock *sk, long *timeo); +int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb);  struct request_sock_ops;  struct timewait_sock_ops; @@ -1040,42 +1042,9 @@ struct proto {  #endif  }; -/* - * Bits in struct cg_proto.flags - */ -enum cg_proto_flags { -	/* Currently active and new sockets should be assigned to cgroups */ -	MEMCG_SOCK_ACTIVE, -	/* It was ever activated; we must disarm static keys on destruction */ -	MEMCG_SOCK_ACTIVATED, -}; - -struct cg_proto { -	struct page_counter	memory_allocated;	/* Current allocated memory. */ -	struct percpu_counter	sockets_allocated;	/* Current number of sockets. */ -	int			memory_pressure; -	long			sysctl_mem[3]; -	unsigned long		flags; -	/* -	 * memcg field is used to find which memcg we belong directly -	 * Each memcg struct can hold more than one cg_proto, so container_of -	 * won't really cut. -	 * -	 * The elegant solution would be having an inverse function to -	 * proto_cgroup in struct proto, but that means polluting the structure -	 * for everybody, instead of just for memcg users. -	 */ -	struct mem_cgroup	*memcg; -}; -  int proto_register(struct proto *prot, int alloc_slab);  void proto_unregister(struct proto *prot); -static inline bool memcg_proto_active(struct cg_proto *cg_proto) -{ -	return test_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); -} -  #ifdef SOCK_REFCNT_DEBUG  static inline void sk_refcnt_debug_inc(struct sock *sk)  { @@ -1685,6 +1654,20 @@ static inline void sock_graft(struct sock *sk, struct socket *parent)  kuid_t sock_i_uid(struct sock *sk);  unsigned long sock_i_ino(struct sock *sk); +static inline void sk_set_txhash(struct sock *sk) +{ +	sk->sk_txhash = prandom_u32(); + +	if (unlikely(!sk->sk_txhash)) +		sk->sk_txhash = 1; +} + +static inline void sk_rethink_txhash(struct sock *sk) +{ +	if (sk->sk_txhash) +		sk_set_txhash(sk); +} +  static inline struct dst_entry *  __sk_dst_get(struct sock *sk)  { @@ -1709,6 +1692,8 @@ static inline void dst_negative_advice(struct sock *sk)  {  	struct dst_entry *ndst, *dst = __sk_dst_get(sk); +	sk_rethink_txhash(sk); +  	if (dst && dst->ops->negative_advice) {  		ndst = dst->ops->negative_advice(dst); diff --git a/include/net/switchdev.h b/include/net/switchdev.h index d5671f118bfc..319baab3b48e 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -72,6 +72,7 @@ struct switchdev_obj {  		struct switchdev_obj_fdb {		/* PORT_FDB */  			const unsigned char *addr;  			u16 vid; +			u16 ndm_state;  		} fdb;  	} u;  }; @@ -157,6 +158,9 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],  int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,  			    struct net_device *dev,  			    struct net_device *filter_dev, int idx); +void switchdev_port_fwd_mark_set(struct net_device *dev, +				 struct net_device *group_dev, +				 bool joining);  #else @@ -271,6 +275,12 @@ static inline int switchdev_port_fdb_dump(struct sk_buff *skb,  	return -EOPNOTSUPP;  } +static inline void switchdev_port_fwd_mark_set(struct net_device *dev, +					       struct net_device *group_dev, +					       bool joining) +{ +} +  #endif  #endif /* _LINUX_SWITCHDEV_H_ */ diff --git a/include/net/tc_act/tc_bpf.h b/include/net/tc_act/tc_bpf.h index a152e9858b2c..958d69cfb19c 100644 --- a/include/net/tc_act/tc_bpf.h +++ b/include/net/tc_act/tc_bpf.h @@ -15,7 +15,7 @@  struct tcf_bpf {  	struct tcf_common	common; -	struct bpf_prog		*filter; +	struct bpf_prog __rcu	*filter;  	union {  		u32		bpf_fd;  		u16		bpf_num_ops; diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h index 9fc9b578908a..592a6bc02b0b 100644 --- a/include/net/tc_act/tc_gact.h +++ b/include/net/tc_act/tc_gact.h @@ -6,9 +6,10 @@  struct tcf_gact {  	struct tcf_common	common;  #ifdef CONFIG_GACT_PROB -        u16			tcfg_ptype; -        u16			tcfg_pval; -        int			tcfg_paction; +	u16			tcfg_ptype; +	u16			tcfg_pval; +	int			tcfg_paction; +	atomic_t		packets;  #endif  };  #define to_gact(a) \ diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h index 4dd77a1c106b..dae96bae1c19 100644 --- a/include/net/tc_act/tc_mirred.h +++ b/include/net/tc_act/tc_mirred.h @@ -8,7 +8,7 @@ struct tcf_mirred {  	int			tcfm_eaction;  	int			tcfm_ifindex;  	int			tcfm_ok_push; -	struct net_device	*tcfm_dev; +	struct net_device __rcu	*tcfm_dev;  	struct list_head	tcfm_list;  };  #define to_mirred(a) \ diff --git a/include/net/tcp.h b/include/net/tcp.h index 950cfecaad3c..0cab28cd43a9 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -281,6 +281,8 @@ extern unsigned int sysctl_tcp_notsent_lowat;  extern int sysctl_tcp_min_tso_segs;  extern int sysctl_tcp_autocorking;  extern int sysctl_tcp_invalid_ratelimit; +extern int sysctl_tcp_pacing_ss_ratio; +extern int sysctl_tcp_pacing_ca_ratio;  extern atomic_long_t tcp_memory_allocated;  extern struct percpu_counter tcp_sockets_allocated; @@ -886,7 +888,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked);  extern struct tcp_congestion_ops tcp_reno;  struct tcp_congestion_ops *tcp_ca_find_key(u32 key); -u32 tcp_ca_get_key_by_name(const char *name); +u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca);  #ifdef CONFIG_INET  char *tcp_ca_get_name_by_key(u32 key, char *buffer);  #else @@ -989,6 +991,11 @@ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)  #define TCP_INFINITE_SSTHRESH	0x7fffffff +static inline bool tcp_in_slow_start(const struct tcp_sock *tp) +{ +	return tp->snd_cwnd < tp->snd_ssthresh; +} +  static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp)  {  	return tp->snd_ssthresh >= TCP_INFINITE_SSTHRESH; @@ -1065,7 +1072,7 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk)  	const struct tcp_sock *tp = tcp_sk(sk);  	/* If in slow start, ensure cwnd grows to twice what was ACKed. */ -	if (tp->snd_cwnd <= tp->snd_ssthresh) +	if (tcp_in_slow_start(tp))  		return tp->snd_cwnd < 2 * tp->max_packets_out;  	return tp->is_cwnd_limited; @@ -1160,6 +1167,19 @@ static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)  }  u32 tcp_default_init_rwnd(u32 mss); +void tcp_cwnd_restart(struct sock *sk, s32 delta); + +static inline void tcp_slow_start_after_idle_check(struct sock *sk) +{ +	struct tcp_sock *tp = tcp_sk(sk); +	s32 delta; + +	if (!sysctl_tcp_slow_start_after_idle || tp->packets_out) +		return; +	delta = tcp_time_stamp - tp->lsndtime; +	if (delta > inet_csk(sk)->icsk_rto) +		tcp_cwnd_restart(sk, delta); +}  /* Determine a window scaling and initial window to offer. */  void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd, diff --git a/include/net/timewait_sock.h b/include/net/timewait_sock.h index 68f0ecad6c6e..1a47946f95ba 100644 --- a/include/net/timewait_sock.h +++ b/include/net/timewait_sock.h @@ -33,9 +33,6 @@ static inline int twsk_unique(struct sock *sk, struct sock *sktw, void *twp)  static inline void twsk_destructor(struct sock *sk)  { -	BUG_ON(sk == NULL); -	BUG_ON(sk->sk_prot == NULL); -	BUG_ON(sk->sk_prot->twsk_prot == NULL);  	if (sk->sk_prot->twsk_prot->twsk_destructor != NULL)  		sk->sk_prot->twsk_prot->twsk_destructor(sk);  } diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index c491c1221606..cb2f89f20f5c 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -31,7 +31,8 @@ struct udp_port_cfg {  	__be16			peer_udp_port;  	unsigned int		use_udp_checksums:1,  				use_udp6_tx_checksums:1, -				use_udp6_rx_checksums:1; +				use_udp6_rx_checksums:1, +				ipv6_v6only:1;  };  int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg, @@ -93,6 +94,10 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,  void udp_tunnel_sock_release(struct socket *sock); +struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, +				    __be16 flags, __be64 tunnel_id, +				    int md_size); +  static inline struct sk_buff *udp_tunnel_handle_offloads(struct sk_buff *skb,  							 bool udp_csum)  { diff --git a/include/net/vrf.h b/include/net/vrf.h new file mode 100644 index 000000000000..593e6094ddd4 --- /dev/null +++ b/include/net/vrf.h @@ -0,0 +1,178 @@ +/* + * include/net/net_vrf.h - adds vrf dev structure definitions + * Copyright (c) 2015 Cumulus Networks + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __LINUX_NET_VRF_H +#define __LINUX_NET_VRF_H + +struct net_vrf_dev { +	struct rcu_head		rcu; +	int                     ifindex; /* ifindex of master dev */ +	u32                     tb_id;   /* table id for VRF */ +}; + +struct slave { +	struct list_head	list; +	struct net_device	*dev; +}; + +struct slave_queue { +	struct list_head	all_slaves; +}; + +struct net_vrf { +	struct slave_queue	queue; +	struct rtable           *rth; +	u32			tb_id; +}; + + +#if IS_ENABLED(CONFIG_NET_VRF) +/* called with rcu_read_lock() */ +static inline int vrf_master_ifindex_rcu(const struct net_device *dev) +{ +	struct net_vrf_dev *vrf_ptr; +	int ifindex = 0; + +	if (!dev) +		return 0; + +	if (netif_is_vrf(dev)) { +		ifindex = dev->ifindex; +	} else { +		vrf_ptr = rcu_dereference(dev->vrf_ptr); +		if (vrf_ptr) +			ifindex = vrf_ptr->ifindex; +	} + +	return ifindex; +} + +static inline int vrf_master_ifindex(const struct net_device *dev) +{ +	int ifindex; + +	rcu_read_lock(); +	ifindex = vrf_master_ifindex_rcu(dev); +	rcu_read_unlock(); + +	return ifindex; +} + +/* called with rcu_read_lock */ +static inline u32 vrf_dev_table_rcu(const struct net_device *dev) +{ +	u32 tb_id = 0; + +	if (dev) { +		struct net_vrf_dev *vrf_ptr; + +		vrf_ptr = rcu_dereference(dev->vrf_ptr); +		if (vrf_ptr) +			tb_id = vrf_ptr->tb_id; +	} +	return tb_id; +} + +static inline u32 vrf_dev_table(const struct net_device *dev) +{ +	u32 tb_id; + +	rcu_read_lock(); +	tb_id = vrf_dev_table_rcu(dev); +	rcu_read_unlock(); + +	return tb_id; +} + +static inline u32 vrf_dev_table_ifindex(struct net *net, int ifindex) +{ +	struct net_device *dev; +	u32 tb_id = 0; + +	if (!ifindex) +		return 0; + +	rcu_read_lock(); + +	dev = dev_get_by_index_rcu(net, ifindex); +	if (dev) +		tb_id = vrf_dev_table_rcu(dev); + +	rcu_read_unlock(); + +	return tb_id; +} + +/* called with rtnl */ +static inline u32 vrf_dev_table_rtnl(const struct net_device *dev) +{ +	u32 tb_id = 0; + +	if (dev) { +		struct net_vrf_dev *vrf_ptr; + +		vrf_ptr = rtnl_dereference(dev->vrf_ptr); +		if (vrf_ptr) +			tb_id = vrf_ptr->tb_id; +	} +	return tb_id; +} + +/* caller has already checked netif_is_vrf(dev) */ +static inline struct rtable *vrf_dev_get_rth(const struct net_device *dev) +{ +	struct rtable *rth = ERR_PTR(-ENETUNREACH); +	struct net_vrf *vrf = netdev_priv(dev); + +	if (vrf) { +		rth = vrf->rth; +		atomic_inc(&rth->dst.__refcnt); +	} +	return rth; +} + +#else +static inline int vrf_master_ifindex_rcu(const struct net_device *dev) +{ +	return 0; +} + +static inline int vrf_master_ifindex(const struct net_device *dev) +{ +	return 0; +} + +static inline u32 vrf_dev_table_rcu(const struct net_device *dev) +{ +	return 0; +} + +static inline u32 vrf_dev_table(const struct net_device *dev) +{ +	return 0; +} + +static inline u32 vrf_dev_table_ifindex(struct net *net, int ifindex) +{ +	return 0; +} + +static inline u32 vrf_dev_table_rtnl(const struct net_device *dev) +{ +	return 0; +} + +static inline struct rtable *vrf_dev_get_rth(const struct net_device *dev) +{ +	return ERR_PTR(-ENETUNREACH); +} +#endif + +#endif /* __LINUX_NET_VRF_H */ diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 0082b5d33d7d..480a319b4c92 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -7,6 +7,7 @@  #include <linux/skbuff.h>  #include <linux/netdevice.h>  #include <linux/udp.h> +#include <net/dst_metadata.h>  #define VNI_HASH_BITS	10  #define VNI_HASH_SIZE	(1<<VNI_HASH_BITS) @@ -94,20 +95,18 @@ struct vxlanhdr {  #define VXLAN_VNI_MASK  (VXLAN_VID_MASK << 8)  #define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr)) +#define VNI_HASH_BITS	10 +#define VNI_HASH_SIZE	(1<<VNI_HASH_BITS) +#define FDB_HASH_BITS	8 +#define FDB_HASH_SIZE	(1<<FDB_HASH_BITS) +  struct vxlan_metadata { -	__be32		vni;  	u32		gbp;  }; -struct vxlan_sock; -typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb, -			   struct vxlan_metadata *md); -  /* per UDP socket information */  struct vxlan_sock {  	struct hlist_node hlist; -	vxlan_rcv_t	 *rcv; -	void		 *data;  	struct work_struct del_work;  	struct socket	 *sock;  	struct rcu_head	  rcu; @@ -117,6 +116,58 @@ struct vxlan_sock {  	u32		  flags;  }; +union vxlan_addr { +	struct sockaddr_in sin; +	struct sockaddr_in6 sin6; +	struct sockaddr sa; +}; + +struct vxlan_rdst { +	union vxlan_addr	 remote_ip; +	__be16			 remote_port; +	u32			 remote_vni; +	u32			 remote_ifindex; +	struct list_head	 list; +	struct rcu_head		 rcu; +}; + +struct vxlan_config { +	union vxlan_addr	remote_ip; +	union vxlan_addr	saddr; +	u32			vni; +	int			remote_ifindex; +	int			mtu; +	__be16			dst_port; +	__u16			port_min; +	__u16			port_max; +	__u8			tos; +	__u8			ttl; +	u32			flags; +	unsigned long		age_interval; +	unsigned int		addrmax; +	bool			no_share; +}; + +/* Pseudo network device */ +struct vxlan_dev { +	struct hlist_node hlist;	/* vni hash table */ +	struct list_head  next;		/* vxlan's per namespace list */ +	struct vxlan_sock *vn_sock;	/* listening socket */ +	struct net_device *dev; +	struct net	  *net;		/* netns for packet i/o */ +	struct vxlan_rdst default_dst;	/* default destination */ +	u32		  flags;	/* VXLAN_F_* in vxlan.h */ + +	struct timer_list age_timer; +	spinlock_t	  hash_lock; +	unsigned int	  addrcnt; +	struct gro_cells  gro_cells; + +	struct vxlan_config	cfg; + +	struct hlist_head fdb_head[FDB_HASH_SIZE]; +}; +  #define VXLAN_F_LEARN			0x01  #define VXLAN_F_PROXY			0x02  #define VXLAN_F_RSC			0x04 @@ -130,6 +181,7 @@ struct vxlan_sock {  #define VXLAN_F_REMCSUM_RX		0x400  #define VXLAN_F_GBP			0x800  #define VXLAN_F_REMCSUM_NOPARTIAL	0x1000 +#define VXLAN_F_COLLECT_METADATA	0x2000  /* Flags that are used in the receive path. These flags must match in   * order for a socket to be shareable @@ -137,18 +189,16 @@ struct vxlan_sock {  #define VXLAN_F_RCV_FLAGS		(VXLAN_F_GBP |			\  					 VXLAN_F_UDP_ZERO_CSUM6_RX |	\  					 VXLAN_F_REMCSUM_RX |		\ -					 VXLAN_F_REMCSUM_NOPARTIAL) - -struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, -				  vxlan_rcv_t *rcv, void *data, -				  bool no_share, u32 flags); +					 VXLAN_F_REMCSUM_NOPARTIAL |	\ +					 VXLAN_F_COLLECT_METADATA) -void vxlan_sock_release(struct vxlan_sock *vs); +struct net_device *vxlan_dev_create(struct net *net, const char *name, +				    u8 name_assign_type, struct vxlan_config *conf); -int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, -		   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, -		   __be16 src_port, __be16 dst_port, struct vxlan_metadata *md, -		   bool xnet, u32 vxflags); +static inline __be16 vxlan_dev_dst_port(struct vxlan_dev *vxlan) +{ +	return inet_sk(vxlan->vn_sock->sock->sk)->inet_sport; +}  static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,  						     netdev_features_t features) @@ -191,4 +241,10 @@ static inline void vxlan_get_rx_port(struct net_device *netdev)  {  }  #endif + +static inline unsigned short vxlan_get_sk_family(struct vxlan_sock *vs) +{ +	return vs->sock->sk->sk_family; +} +  #endif diff --git a/include/net/xfrm.h b/include/net/xfrm.h index f0ee97eec24d..312e3fee9ccf 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -285,10 +285,13 @@ struct xfrm_policy_afinfo {  	unsigned short		family;  	struct dst_ops		*dst_ops;  	void			(*garbage_collect)(struct net *net); -	struct dst_entry	*(*dst_lookup)(struct net *net, int tos, +	struct dst_entry	*(*dst_lookup)(struct net *net, +					       int tos, int oif,  					       const xfrm_address_t *saddr,  					       const xfrm_address_t *daddr); -	int			(*get_saddr)(struct net *net, xfrm_address_t *saddr, xfrm_address_t *daddr); +	int			(*get_saddr)(struct net *net, int oif, +					     xfrm_address_t *saddr, +					     xfrm_address_t *daddr);  	void			(*decode_session)(struct sk_buff *skb,  						  struct flowi *fl,  						  int reverse);  |