diff options
Diffstat (limited to 'net/openvswitch')
| -rw-r--r-- | net/openvswitch/actions.c | 79 | ||||
| -rw-r--r-- | net/openvswitch/conntrack.c | 2 | ||||
| -rw-r--r-- | net/openvswitch/datapath.c | 25 | ||||
| -rw-r--r-- | net/openvswitch/flow.c | 118 | ||||
| -rw-r--r-- | net/openvswitch/flow.h | 12 | ||||
| -rw-r--r-- | net/openvswitch/flow_netlink.c | 316 | ||||
| -rw-r--r-- | net/openvswitch/flow_netlink.h | 3 | ||||
| -rw-r--r-- | net/openvswitch/flow_table.c | 25 | ||||
| -rw-r--r-- | net/openvswitch/vport-internal_dev.c | 2 | ||||
| -rw-r--r-- | net/openvswitch/vport.c | 8 | 
10 files changed, 390 insertions, 200 deletions
| diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 1ecbd7715f6d..4e03f64709bc 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -71,6 +71,8 @@ struct ovs_frag_data {  static DEFINE_PER_CPU(struct ovs_frag_data, ovs_frag_data_storage);  #define DEFERRED_ACTION_FIFO_SIZE 10 +#define OVS_RECURSION_LIMIT 5 +#define OVS_DEFERRED_ACTION_THRESHOLD (OVS_RECURSION_LIMIT - 2)  struct action_fifo {  	int head;  	int tail; @@ -78,7 +80,12 @@ struct action_fifo {  	struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE];  }; +struct recirc_keys { +	struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD]; +}; +  static struct action_fifo __percpu *action_fifos; +static struct recirc_keys __percpu *recirc_keys;  static DEFINE_PER_CPU(int, exec_actions_level);  static void action_fifo_init(struct action_fifo *fifo) @@ -153,7 +160,7 @@ static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr,  static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,  		     const struct ovs_action_push_mpls *mpls)  { -	__be32 *new_mpls_lse; +	struct mpls_shim_hdr *new_mpls_lse;  	/* Networking stack do not allow simultaneous Tunnel and MPLS GSO. */  	if (skb->encapsulation) @@ -162,19 +169,23 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,  	if (skb_cow_head(skb, MPLS_HLEN) < 0)  		return -ENOMEM; +	if (!skb->inner_protocol) { +		skb_set_inner_network_header(skb, skb->mac_len); +		skb_set_inner_protocol(skb, skb->protocol); +	} +  	skb_push(skb, MPLS_HLEN);  	memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),  		skb->mac_len);  	skb_reset_mac_header(skb); +	skb_set_network_header(skb, skb->mac_len); -	new_mpls_lse = (__be32 *)skb_mpls_header(skb); -	*new_mpls_lse = mpls->mpls_lse; +	new_mpls_lse = mpls_hdr(skb); +	new_mpls_lse->label_stack_entry = mpls->mpls_lse;  	skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN);  	update_ethertype(skb, eth_hdr(skb), mpls->mpls_ethertype); -	if (!skb->inner_protocol) -		skb_set_inner_protocol(skb, skb->protocol);  	skb->protocol = mpls->mpls_ethertype;  	invalidate_flow_key(key); @@ -191,18 +202,19 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,  	if (unlikely(err))  		return err; -	skb_postpull_rcsum(skb, skb_mpls_header(skb), MPLS_HLEN); +	skb_postpull_rcsum(skb, mpls_hdr(skb), MPLS_HLEN);  	memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),  		skb->mac_len);  	__skb_pull(skb, MPLS_HLEN);  	skb_reset_mac_header(skb); +	skb_set_network_header(skb, skb->mac_len); -	/* skb_mpls_header() is used to locate the ethertype -	 * field correctly in the presence of VLAN tags. +	/* mpls_hdr() is used to locate the ethertype field correctly in the +	 * presence of VLAN tags.  	 */ -	hdr = (struct ethhdr *)(skb_mpls_header(skb) - ETH_HLEN); +	hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN);  	update_ethertype(skb, hdr, ethertype);  	if (eth_p_mpls(skb->protocol))  		skb->protocol = ethertype; @@ -214,7 +226,7 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,  static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,  		    const __be32 *mpls_lse, const __be32 *mask)  { -	__be32 *stack; +	struct mpls_shim_hdr *stack;  	__be32 lse;  	int err; @@ -222,16 +234,16 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,  	if (unlikely(err))  		return err; -	stack = (__be32 *)skb_mpls_header(skb); -	lse = OVS_MASKED(*stack, *mpls_lse, *mask); +	stack = mpls_hdr(skb); +	lse = OVS_MASKED(stack->label_stack_entry, *mpls_lse, *mask);  	if (skb->ip_summed == CHECKSUM_COMPLETE) { -		__be32 diff[] = { ~(*stack), lse }; +		__be32 diff[] = { ~(stack->label_stack_entry), lse };  		skb->csum = ~csum_partial((char *)diff, sizeof(diff),  					  ~skb->csum);  	} -	*stack = lse; +	stack->label_stack_entry = lse;  	flow_key->mpls.top_lse = lse;  	return 0;  } @@ -241,20 +253,24 @@ static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)  	int err;  	err = skb_vlan_pop(skb); -	if (skb_vlan_tag_present(skb)) +	if (skb_vlan_tag_present(skb)) {  		invalidate_flow_key(key); -	else -		key->eth.tci = 0; +	} else { +		key->eth.vlan.tci = 0; +		key->eth.vlan.tpid = 0; +	}  	return err;  }  static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,  		     const struct ovs_action_push_vlan *vlan)  { -	if (skb_vlan_tag_present(skb)) +	if (skb_vlan_tag_present(skb)) {  		invalidate_flow_key(key); -	else -		key->eth.tci = vlan->vlan_tci; +	} else { +		key->eth.vlan.tci = vlan->vlan_tci; +		key->eth.vlan.tpid = vlan->vlan_tpid; +	}  	return skb_vlan_push(skb, vlan->vlan_tpid,  			     ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);  } @@ -1011,6 +1027,7 @@ static int execute_recirc(struct datapath *dp, struct sk_buff *skb,  			  const struct nlattr *a, int rem)  {  	struct deferred_action *da; +	int level;  	if (!is_flow_key_valid(key)) {  		int err; @@ -1034,6 +1051,18 @@ static int execute_recirc(struct datapath *dp, struct sk_buff *skb,  			return 0;  	} +	level = this_cpu_read(exec_actions_level); +	if (level <= OVS_DEFERRED_ACTION_THRESHOLD) { +		struct recirc_keys *rks = this_cpu_ptr(recirc_keys); +		struct sw_flow_key *recirc_key = &rks->key[level - 1]; + +		*recirc_key = *key; +		recirc_key->recirc_id = nla_get_u32(a); +		ovs_dp_process_packet(skb, recirc_key); + +		return 0; +	} +  	da = add_deferred_actions(skb, key, NULL);  	if (da) {  		da->pkt_key.recirc_id = nla_get_u32(a); @@ -1200,11 +1229,10 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,  			const struct sw_flow_actions *acts,  			struct sw_flow_key *key)  { -	static const int ovs_recursion_limit = 5;  	int err, level;  	level = __this_cpu_inc_return(exec_actions_level); -	if (unlikely(level > ovs_recursion_limit)) { +	if (unlikely(level > OVS_RECURSION_LIMIT)) {  		net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n",  				     ovs_dp_name(dp));  		kfree_skb(skb); @@ -1229,10 +1257,17 @@ int action_fifos_init(void)  	if (!action_fifos)  		return -ENOMEM; +	recirc_keys = alloc_percpu(struct recirc_keys); +	if (!recirc_keys) { +		free_percpu(action_fifos); +		return -ENOMEM; +	} +  	return 0;  }  void action_fifos_exit(void)  {  	free_percpu(action_fifos); +	free_percpu(recirc_keys);  } diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index e054a748ff25..31045ef44a82 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1367,7 +1367,7 @@ static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)  	if (ct_info->helper)  		module_put(ct_info->helper->me);  	if (ct_info->ct) -		nf_ct_put(ct_info->ct); +		nf_ct_tmpl_free(ct_info->ct);  }  void ovs_ct_init(struct net *net) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 524c0fd3078e..4d67ea856067 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -928,7 +928,6 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)  	struct sw_flow_mask mask;  	struct sk_buff *reply;  	struct datapath *dp; -	struct sw_flow_key key;  	struct sw_flow_actions *acts;  	struct sw_flow_match match;  	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); @@ -956,20 +955,24 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)  	}  	/* Extract key. */ -	ovs_match_init(&match, &key, &mask); +	ovs_match_init(&match, &new_flow->key, false, &mask);  	error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],  				  a[OVS_FLOW_ATTR_MASK], log);  	if (error)  		goto err_kfree_flow; -	ovs_flow_mask_key(&new_flow->key, &key, true, &mask); -  	/* Extract flow identifier. */  	error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID], -				       &key, log); +				       &new_flow->key, log);  	if (error)  		goto err_kfree_flow; +	/* unmasked key is needed to match when ufid is not used. */ +	if (ovs_identifier_is_key(&new_flow->id)) +		match.key = new_flow->id.unmasked_key; + +	ovs_flow_mask_key(&new_flow->key, &new_flow->key, true, &mask); +  	/* Validate actions. */  	error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],  				     &new_flow->key, &acts, log); @@ -996,7 +999,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)  	if (ovs_identifier_is_ufid(&new_flow->id))  		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);  	if (!flow) -		flow = ovs_flow_tbl_lookup(&dp->table, &key); +		flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->key);  	if (likely(!flow)) {  		rcu_assign_pointer(new_flow->sf_acts, acts); @@ -1121,7 +1124,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)  	ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);  	if (a[OVS_FLOW_ATTR_KEY]) { -		ovs_match_init(&match, &key, &mask); +		ovs_match_init(&match, &key, true, &mask);  		error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],  					  a[OVS_FLOW_ATTR_MASK], log);  	} else if (!ufid_present) { @@ -1238,7 +1241,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)  	ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);  	if (a[OVS_FLOW_ATTR_KEY]) { -		ovs_match_init(&match, &key, NULL); +		ovs_match_init(&match, &key, true, NULL);  		err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL,  					log);  	} else if (!ufid_present) { @@ -1297,7 +1300,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)  	ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);  	if (a[OVS_FLOW_ATTR_KEY]) { -		ovs_match_init(&match, &key, NULL); +		ovs_match_init(&match, &key, true, NULL);  		err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],  					NULL, log);  		if (unlikely(err)) @@ -2437,3 +2440,7 @@ module_exit(dp_cleanup);  MODULE_DESCRIPTION("Open vSwitch switching datapath");  MODULE_LICENSE("GPL"); +MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY); +MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY); +MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY); +MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY); diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 0ea128eeeab2..22087062bd10 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -29,6 +29,7 @@  #include <linux/module.h>  #include <linux/in.h>  #include <linux/rcupdate.h> +#include <linux/cpumask.h>  #include <linux/if_arp.h>  #include <linux/ip.h>  #include <linux/ipv6.h> @@ -72,32 +73,33 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,  {  	struct flow_stats *stats;  	int node = numa_node_id(); +	int cpu = smp_processor_id();  	int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); -	stats = rcu_dereference(flow->stats[node]); +	stats = rcu_dereference(flow->stats[cpu]); -	/* Check if already have node-specific stats. */ +	/* Check if already have CPU-specific stats. */  	if (likely(stats)) {  		spin_lock(&stats->lock);  		/* Mark if we write on the pre-allocated stats. */ -		if (node == 0 && unlikely(flow->stats_last_writer != node)) -			flow->stats_last_writer = node; +		if (cpu == 0 && unlikely(flow->stats_last_writer != cpu)) +			flow->stats_last_writer = cpu;  	} else {  		stats = rcu_dereference(flow->stats[0]); /* Pre-allocated. */  		spin_lock(&stats->lock); -		/* If the current NUMA-node is the only writer on the +		/* If the current CPU is the only writer on the  		 * pre-allocated stats keep using them.  		 */ -		if (unlikely(flow->stats_last_writer != node)) { +		if (unlikely(flow->stats_last_writer != cpu)) {  			/* A previous locker may have already allocated the -			 * stats, so we need to check again.  If node-specific +			 * stats, so we need to check again.  If CPU-specific  			 * stats were already allocated, we update the pre-  			 * allocated stats as we have already locked them.  			 */ -			if (likely(flow->stats_last_writer != NUMA_NO_NODE) -			    && likely(!rcu_access_pointer(flow->stats[node]))) { -				/* Try to allocate node-specific stats. */ +			if (likely(flow->stats_last_writer != -1) && +			    likely(!rcu_access_pointer(flow->stats[cpu]))) { +				/* Try to allocate CPU-specific stats. */  				struct flow_stats *new_stats;  				new_stats = @@ -114,12 +116,12 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,  					new_stats->tcp_flags = tcp_flags;  					spin_lock_init(&new_stats->lock); -					rcu_assign_pointer(flow->stats[node], +					rcu_assign_pointer(flow->stats[cpu],  							   new_stats);  					goto unlock;  				}  			} -			flow->stats_last_writer = node; +			flow->stats_last_writer = cpu;  		}  	} @@ -136,14 +138,15 @@ void ovs_flow_stats_get(const struct sw_flow *flow,  			struct ovs_flow_stats *ovs_stats,  			unsigned long *used, __be16 *tcp_flags)  { -	int node; +	int cpu;  	*used = 0;  	*tcp_flags = 0;  	memset(ovs_stats, 0, sizeof(*ovs_stats)); -	for_each_node(node) { -		struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[node]); +	/* We open code this to make sure cpu 0 is always considered */ +	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) { +		struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);  		if (stats) {  			/* Local CPU may write on non-local stats, so we must @@ -163,10 +166,11 @@ void ovs_flow_stats_get(const struct sw_flow *flow,  /* Called with ovs_mutex. */  void ovs_flow_stats_clear(struct sw_flow *flow)  { -	int node; +	int cpu; -	for_each_node(node) { -		struct flow_stats *stats = ovsl_dereference(flow->stats[node]); +	/* We open code this to make sure cpu 0 is always considered */ +	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) { +		struct flow_stats *stats = ovsl_dereference(flow->stats[cpu]);  		if (stats) {  			spin_lock_bh(&stats->lock); @@ -302,24 +306,57 @@ static bool icmp6hdr_ok(struct sk_buff *skb)  				  sizeof(struct icmp6hdr));  } -static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) +/** + * Parse vlan tag from vlan header. + * Returns ERROR on memory error. + * Returns 0 if it encounters a non-vlan or incomplete packet. + * Returns 1 after successfully parsing vlan tag. + */ +static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh)  { -	struct qtag_prefix { -		__be16 eth_type; /* ETH_P_8021Q */ -		__be16 tci; -	}; -	struct qtag_prefix *qp; +	struct vlan_head *vh = (struct vlan_head *)skb->data; + +	if (likely(!eth_type_vlan(vh->tpid))) +		return 0; -	if (unlikely(skb->len < sizeof(struct qtag_prefix) + sizeof(__be16))) +	if (unlikely(skb->len < sizeof(struct vlan_head) + sizeof(__be16)))  		return 0; -	if (unlikely(!pskb_may_pull(skb, sizeof(struct qtag_prefix) + -					 sizeof(__be16)))) +	if (unlikely(!pskb_may_pull(skb, sizeof(struct vlan_head) + +				 sizeof(__be16))))  		return -ENOMEM; -	qp = (struct qtag_prefix *) skb->data; -	key->eth.tci = qp->tci | htons(VLAN_TAG_PRESENT); -	__skb_pull(skb, sizeof(struct qtag_prefix)); +	vh = (struct vlan_head *)skb->data; +	key_vh->tci = vh->tci | htons(VLAN_TAG_PRESENT); +	key_vh->tpid = vh->tpid; + +	__skb_pull(skb, sizeof(struct vlan_head)); +	return 1; +} + +static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) +{ +	int res; + +	key->eth.vlan.tci = 0; +	key->eth.vlan.tpid = 0; +	key->eth.cvlan.tci = 0; +	key->eth.cvlan.tpid = 0; + +	if (skb_vlan_tag_present(skb)) { +		key->eth.vlan.tci = htons(skb->vlan_tci); +		key->eth.vlan.tpid = skb->vlan_proto; +	} else { +		/* Parse outer vlan tag in the non-accelerated case. */ +		res = parse_vlan_tag(skb, &key->eth.vlan); +		if (res <= 0) +			return res; +	} + +	/* Parse inner vlan tag. */ +	res = parse_vlan_tag(skb, &key->eth.cvlan); +	if (res <= 0) +		return res;  	return 0;  } @@ -480,12 +517,8 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)  	 * update skb->csum here.  	 */ -	key->eth.tci = 0; -	if (skb_vlan_tag_present(skb)) -		key->eth.tci = htons(skb->vlan_tci); -	else if (eth->h_proto == htons(ETH_P_8021Q)) -		if (unlikely(parse_vlan(skb, key))) -			return -ENOMEM; +	if (unlikely(parse_vlan(skb, key))) +		return -ENOMEM;  	key->eth.type = parse_ethertype(skb);  	if (unlikely(key->eth.type == htons(0))) @@ -600,12 +633,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)  	} else if (eth_p_mpls(key->eth.type)) {  		size_t stack_len = MPLS_HLEN; -		/* In the presence of an MPLS label stack the end of the L2 -		 * header and the beginning of the L3 header differ. -		 * -		 * Advance network_header to the beginning of the L3 -		 * header. mac_len corresponds to the end of the L2 header. -		 */ +		skb_set_inner_network_header(skb, skb->mac_len);  		while (1) {  			__be32 lse; @@ -613,12 +641,12 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)  			if (unlikely(error))  				return 0; -			memcpy(&lse, skb_network_header(skb), MPLS_HLEN); +			memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN);  			if (stack_len == MPLS_HLEN)  				memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN); -			skb_set_network_header(skb, skb->mac_len + stack_len); +			skb_set_inner_network_header(skb, skb->mac_len + stack_len);  			if (lse & htonl(MPLS_LS_S_MASK))  				break; @@ -734,8 +762,6 @@ int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr,  {  	int err; -	memset(key, 0, OVS_SW_FLOW_KEY_METADATA_SIZE); -  	/* Extract metadata from netlink attributes. */  	err = ovs_nla_get_flow_metadata(net, attr, key, log);  	if (err) diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 03378e75a67c..ae783f5c6695 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -50,6 +50,11 @@ struct ovs_tunnel_info {  	struct metadata_dst	*tun_dst;  }; +struct vlan_head { +	__be16 tpid; /* Vlan type. Generally 802.1q or 802.1ad.*/ +	__be16 tci;  /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */ +}; +  #define OVS_SW_FLOW_KEY_METADATA_SIZE			\  	(offsetof(struct sw_flow_key, recirc_id) +	\  	FIELD_SIZEOF(struct sw_flow_key, recirc_id)) @@ -69,7 +74,8 @@ struct sw_flow_key {  	struct {  		u8     src[ETH_ALEN];	/* Ethernet source address. */  		u8     dst[ETH_ALEN];	/* Ethernet destination address. */ -		__be16 tci;		/* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */ +		struct vlan_head vlan; +		struct vlan_head cvlan;  		__be16 type;		/* Ethernet frame type. */  	} eth;  	union { @@ -172,14 +178,14 @@ struct sw_flow {  		struct hlist_node node[2];  		u32 hash;  	} flow_table, ufid_table; -	int stats_last_writer;		/* NUMA-node id of the last writer on +	int stats_last_writer;		/* CPU id of the last writer on  					 * 'stats[0]'.  					 */  	struct sw_flow_key key;  	struct sw_flow_id id;  	struct sw_flow_mask *mask;  	struct sw_flow_actions __rcu *sf_acts; -	struct flow_stats __rcu *stats[]; /* One for each NUMA node.  First one +	struct flow_stats __rcu *stats[]; /* One for each CPU.  First one  					   * is allocated at flow creation time,  					   * the rest are allocated on demand  					   * while holding the 'stats[0].lock'. diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index c78a6a1476fb..ae25ded82b3b 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -808,6 +808,167 @@ int ovs_nla_put_tunnel_info(struct sk_buff *skb,  				  ip_tunnel_info_af(tun_info));  } +static int encode_vlan_from_nlattrs(struct sw_flow_match *match, +				    const struct nlattr *a[], +				    bool is_mask, bool inner) +{ +	__be16 tci = 0; +	__be16 tpid = 0; + +	if (a[OVS_KEY_ATTR_VLAN]) +		tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); + +	if (a[OVS_KEY_ATTR_ETHERTYPE]) +		tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); + +	if (likely(!inner)) { +		SW_FLOW_KEY_PUT(match, eth.vlan.tpid, tpid, is_mask); +		SW_FLOW_KEY_PUT(match, eth.vlan.tci, tci, is_mask); +	} else { +		SW_FLOW_KEY_PUT(match, eth.cvlan.tpid, tpid, is_mask); +		SW_FLOW_KEY_PUT(match, eth.cvlan.tci, tci, is_mask); +	} +	return 0; +} + +static int validate_vlan_from_nlattrs(const struct sw_flow_match *match, +				      u64 key_attrs, bool inner, +				      const struct nlattr **a, bool log) +{ +	__be16 tci = 0; + +	if (!((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) && +	      (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) && +	       eth_type_vlan(nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE])))) { +		/* Not a VLAN. */ +		return 0; +	} + +	if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) && +	      (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) { +		OVS_NLERR(log, "Invalid %s frame", (inner) ? "C-VLAN" : "VLAN"); +		return -EINVAL; +	} + +	if (a[OVS_KEY_ATTR_VLAN]) +		tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); + +	if (!(tci & htons(VLAN_TAG_PRESENT))) { +		if (tci) { +			OVS_NLERR(log, "%s TCI does not have VLAN_TAG_PRESENT bit set.", +				  (inner) ? "C-VLAN" : "VLAN"); +			return -EINVAL; +		} else if (nla_len(a[OVS_KEY_ATTR_ENCAP])) { +			/* Corner case for truncated VLAN header. */ +			OVS_NLERR(log, "Truncated %s header has non-zero encap attribute.", +				  (inner) ? "C-VLAN" : "VLAN"); +			return -EINVAL; +		} +	} + +	return 1; +} + +static int validate_vlan_mask_from_nlattrs(const struct sw_flow_match *match, +					   u64 key_attrs, bool inner, +					   const struct nlattr **a, bool log) +{ +	__be16 tci = 0; +	__be16 tpid = 0; +	bool encap_valid = !!(match->key->eth.vlan.tci & +			      htons(VLAN_TAG_PRESENT)); +	bool i_encap_valid = !!(match->key->eth.cvlan.tci & +				htons(VLAN_TAG_PRESENT)); + +	if (!(key_attrs & (1 << OVS_KEY_ATTR_ENCAP))) { +		/* Not a VLAN. */ +		return 0; +	} + +	if ((!inner && !encap_valid) || (inner && !i_encap_valid)) { +		OVS_NLERR(log, "Encap mask attribute is set for non-%s frame.", +			  (inner) ? "C-VLAN" : "VLAN"); +		return -EINVAL; +	} + +	if (a[OVS_KEY_ATTR_VLAN]) +		tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); + +	if (a[OVS_KEY_ATTR_ETHERTYPE]) +		tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); + +	if (tpid != htons(0xffff)) { +		OVS_NLERR(log, "Must have an exact match on %s TPID (mask=%x).", +			  (inner) ? "C-VLAN" : "VLAN", ntohs(tpid)); +		return -EINVAL; +	} +	if (!(tci & htons(VLAN_TAG_PRESENT))) { +		OVS_NLERR(log, "%s TCI mask does not have exact match for VLAN_TAG_PRESENT bit.", +			  (inner) ? "C-VLAN" : "VLAN"); +		return -EINVAL; +	} + +	return 1; +} + +static int __parse_vlan_from_nlattrs(struct sw_flow_match *match, +				     u64 *key_attrs, bool inner, +				     const struct nlattr **a, bool is_mask, +				     bool log) +{ +	int err; +	const struct nlattr *encap; + +	if (!is_mask) +		err = validate_vlan_from_nlattrs(match, *key_attrs, inner, +						 a, log); +	else +		err = validate_vlan_mask_from_nlattrs(match, *key_attrs, inner, +						      a, log); +	if (err <= 0) +		return err; + +	err = encode_vlan_from_nlattrs(match, a, is_mask, inner); +	if (err) +		return err; + +	*key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); +	*key_attrs &= ~(1 << OVS_KEY_ATTR_VLAN); +	*key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); + +	encap = a[OVS_KEY_ATTR_ENCAP]; + +	if (!is_mask) +		err = parse_flow_nlattrs(encap, a, key_attrs, log); +	else +		err = parse_flow_mask_nlattrs(encap, a, key_attrs, log); + +	return err; +} + +static int parse_vlan_from_nlattrs(struct sw_flow_match *match, +				   u64 *key_attrs, const struct nlattr **a, +				   bool is_mask, bool log) +{ +	int err; +	bool encap_valid = false; + +	err = __parse_vlan_from_nlattrs(match, key_attrs, false, a, +					is_mask, log); +	if (err) +		return err; + +	encap_valid = !!(match->key->eth.vlan.tci & htons(VLAN_TAG_PRESENT)); +	if (encap_valid) { +		err = __parse_vlan_from_nlattrs(match, key_attrs, true, a, +						is_mask, log); +		if (err) +			return err; +	} + +	return 0; +} +  static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,  				 u64 *attrs, const struct nlattr **a,  				 bool is_mask, bool log) @@ -923,20 +1084,11 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,  	}  	if (attrs & (1 << OVS_KEY_ATTR_VLAN)) { -		__be16 tci; - -		tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); -		if (!(tci & htons(VLAN_TAG_PRESENT))) { -			if (is_mask) -				OVS_NLERR(log, "VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit."); -			else -				OVS_NLERR(log, "VLAN TCI does not have VLAN_TAG_PRESENT bit set."); - -			return -EINVAL; -		} - -		SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask); -		attrs &= ~(1 << OVS_KEY_ATTR_VLAN); +		/* VLAN attribute is always parsed before getting here since it +		 * may occur multiple times. +		 */ +		OVS_NLERR(log, "VLAN attribute unexpected."); +		return -EINVAL;  	}  	if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { @@ -1182,49 +1334,18 @@ int ovs_nla_get_match(struct net *net, struct sw_flow_match *match,  		      bool log)  {  	const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; -	const struct nlattr *encap;  	struct nlattr *newmask = NULL;  	u64 key_attrs = 0;  	u64 mask_attrs = 0; -	bool encap_valid = false;  	int err;  	err = parse_flow_nlattrs(nla_key, a, &key_attrs, log);  	if (err)  		return err; -	if ((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) && -	    (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) && -	    (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) { -		__be16 tci; - -		if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) && -		      (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) { -			OVS_NLERR(log, "Invalid Vlan frame."); -			return -EINVAL; -		} - -		key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); -		tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); -		encap = a[OVS_KEY_ATTR_ENCAP]; -		key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); -		encap_valid = true; - -		if (tci & htons(VLAN_TAG_PRESENT)) { -			err = parse_flow_nlattrs(encap, a, &key_attrs, log); -			if (err) -				return err; -		} else if (!tci) { -			/* Corner case for truncated 802.1Q header. */ -			if (nla_len(encap)) { -				OVS_NLERR(log, "Truncated 802.1Q header has non-zero encap attribute."); -				return -EINVAL; -			} -		} else { -			OVS_NLERR(log, "Encap attr is set for non-VLAN frame"); -			return  -EINVAL; -		} -	} +	err = parse_vlan_from_nlattrs(match, &key_attrs, a, false, log); +	if (err) +		return err;  	err = ovs_key_from_nlattrs(net, match, key_attrs, a, false, log);  	if (err) @@ -1265,46 +1386,12 @@ int ovs_nla_get_match(struct net *net, struct sw_flow_match *match,  			goto free_newmask;  		/* Always match on tci. */ -		SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true); - -		if (mask_attrs & 1 << OVS_KEY_ATTR_ENCAP) { -			__be16 eth_type = 0; -			__be16 tci = 0; - -			if (!encap_valid) { -				OVS_NLERR(log, "Encap mask attribute is set for non-VLAN frame."); -				err = -EINVAL; -				goto free_newmask; -			} - -			mask_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); -			if (a[OVS_KEY_ATTR_ETHERTYPE]) -				eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); - -			if (eth_type == htons(0xffff)) { -				mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); -				encap = a[OVS_KEY_ATTR_ENCAP]; -				err = parse_flow_mask_nlattrs(encap, a, -							      &mask_attrs, log); -				if (err) -					goto free_newmask; -			} else { -				OVS_NLERR(log, "VLAN frames must have an exact match on the TPID (mask=%x).", -					  ntohs(eth_type)); -				err = -EINVAL; -				goto free_newmask; -			} - -			if (a[OVS_KEY_ATTR_VLAN]) -				tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); +		SW_FLOW_KEY_PUT(match, eth.vlan.tci, htons(0xffff), true); +		SW_FLOW_KEY_PUT(match, eth.cvlan.tci, htons(0xffff), true); -			if (!(tci & htons(VLAN_TAG_PRESENT))) { -				OVS_NLERR(log, "VLAN tag present bit must have an exact match (tci_mask=%x).", -					  ntohs(tci)); -				err = -EINVAL; -				goto free_newmask; -			} -		} +		err = parse_vlan_from_nlattrs(match, &mask_attrs, a, true, log); +		if (err) +			goto free_newmask;  		err = ovs_key_from_nlattrs(net, match, mask_attrs, a, true,  					   log); @@ -1410,12 +1497,25 @@ int ovs_nla_get_flow_metadata(struct net *net, const struct nlattr *attr,  	return metadata_from_nlattrs(net, &match, &attrs, a, false, log);  } +static int ovs_nla_put_vlan(struct sk_buff *skb, const struct vlan_head *vh, +			    bool is_mask) +{ +	__be16 eth_type = !is_mask ? vh->tpid : htons(0xffff); + +	if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) || +	    nla_put_be16(skb, OVS_KEY_ATTR_VLAN, vh->tci)) +		return -EMSGSIZE; +	return 0; +} +  static int __ovs_nla_put_key(const struct sw_flow_key *swkey,  			     const struct sw_flow_key *output, bool is_mask,  			     struct sk_buff *skb)  {  	struct ovs_key_ethernet *eth_key; -	struct nlattr *nla, *encap; +	struct nlattr *nla; +	struct nlattr *encap = NULL; +	struct nlattr *in_encap = NULL;  	if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id))  		goto nla_put_failure; @@ -1464,17 +1564,21 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,  	ether_addr_copy(eth_key->eth_src, output->eth.src);  	ether_addr_copy(eth_key->eth_dst, output->eth.dst); -	if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) { -		__be16 eth_type; -		eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff); -		if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) || -		    nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci)) +	if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) { +		if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask))  			goto nla_put_failure;  		encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); -		if (!swkey->eth.tci) +		if (!swkey->eth.vlan.tci)  			goto unencap; -	} else -		encap = NULL; + +		if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) { +			if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask)) +				goto nla_put_failure; +			in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); +			if (!swkey->eth.cvlan.tci) +				goto unencap; +		} +	}  	if (swkey->eth.type == htons(ETH_P_802_2)) {  		/* @@ -1493,6 +1597,14 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,  	if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))  		goto nla_put_failure; +	if (eth_type_vlan(swkey->eth.type)) { +		/* There are 3 VLAN tags, we don't know anything about the rest +		 * of the packet, so truncate here. +		 */ +		WARN_ON_ONCE(!(encap && in_encap)); +		goto unencap; +	} +  	if (swkey->eth.type == htons(ETH_P_IP)) {  		struct ovs_key_ipv4 *ipv4_key; @@ -1619,6 +1731,8 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,  	}  unencap: +	if (in_encap) +		nla_nest_end(skb, in_encap);  	if (encap)  		nla_nest_end(skb, encap); @@ -1882,13 +1996,15 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,  void ovs_match_init(struct sw_flow_match *match,  		    struct sw_flow_key *key, +		    bool reset_key,  		    struct sw_flow_mask *mask)  {  	memset(match, 0, sizeof(*match));  	match->key = key;  	match->mask = mask; -	memset(key, 0, sizeof(*key)); +	if (reset_key) +		memset(key, 0, sizeof(*key));  	if (mask) {  		memset(&mask->key, 0, sizeof(mask->key)); @@ -1935,7 +2051,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,  	struct nlattr *a;  	int err = 0, start, opts_type; -	ovs_match_init(&match, &key, NULL); +	ovs_match_init(&match, &key, true, NULL);  	opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log);  	if (opts_type < 0)  		return opts_type; @@ -2283,7 +2399,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,  		case OVS_ACTION_ATTR_PUSH_VLAN:  			vlan = nla_data(a); -			if (vlan->vlan_tpid != htons(ETH_P_8021Q)) +			if (!eth_type_vlan(vlan->vlan_tpid))  				return -EINVAL;  			if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))  				return -EINVAL; @@ -2388,7 +2504,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,  	(*sfa)->orig_len = nla_len(attr);  	err = __ovs_nla_copy_actions(net, attr, key, 0, sfa, key->eth.type, -				     key->eth.tci, log); +				     key->eth.vlan.tci, log);  	if (err)  		ovs_nla_free_flow_actions(*sfa); diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index 47dd142eca1c..45f9769e5aac 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -41,7 +41,8 @@ size_t ovs_tun_key_attr_size(void);  size_t ovs_key_attr_size(void);  void ovs_match_init(struct sw_flow_match *match, -		    struct sw_flow_key *key, struct sw_flow_mask *mask); +		    struct sw_flow_key *key, bool reset_key, +		    struct sw_flow_mask *mask);  int ovs_nla_put_key(const struct sw_flow_key *, const struct sw_flow_key *,  		    int attr, bool is_mask, struct sk_buff *); diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index d073fff82fdb..ea7a8073fa02 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -32,6 +32,7 @@  #include <linux/module.h>  #include <linux/in.h>  #include <linux/rcupdate.h> +#include <linux/cpumask.h>  #include <linux/if_arp.h>  #include <linux/ip.h>  #include <linux/ipv6.h> @@ -79,17 +80,12 @@ struct sw_flow *ovs_flow_alloc(void)  {  	struct sw_flow *flow;  	struct flow_stats *stats; -	int node; -	flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); +	flow = kmem_cache_zalloc(flow_cache, GFP_KERNEL);  	if (!flow)  		return ERR_PTR(-ENOMEM); -	flow->sf_acts = NULL; -	flow->mask = NULL; -	flow->id.unmasked_key = NULL; -	flow->id.ufid_len = 0; -	flow->stats_last_writer = NUMA_NO_NODE; +	flow->stats_last_writer = -1;  	/* Initialize the default stat node. */  	stats = kmem_cache_alloc_node(flow_stats_cache, @@ -102,10 +98,6 @@ struct sw_flow *ovs_flow_alloc(void)  	RCU_INIT_POINTER(flow->stats[0], stats); -	for_each_node(node) -		if (node != 0) -			RCU_INIT_POINTER(flow->stats[node], NULL); -  	return flow;  err:  	kmem_cache_free(flow_cache, flow); @@ -142,16 +134,17 @@ static struct flex_array *alloc_buckets(unsigned int n_buckets)  static void flow_free(struct sw_flow *flow)  { -	int node; +	int cpu;  	if (ovs_identifier_is_key(&flow->id))  		kfree(flow->id.unmasked_key);  	if (flow->sf_acts)  		ovs_nla_free_flow_actions((struct sw_flow_actions __force *)flow->sf_acts); -	for_each_node(node) -		if (flow->stats[node]) +	/* We open code this to make sure cpu 0 is always considered */ +	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) +		if (flow->stats[cpu])  			kmem_cache_free(flow_stats_cache, -					(struct flow_stats __force *)flow->stats[node]); +					(struct flow_stats __force *)flow->stats[cpu]);  	kmem_cache_free(flow_cache, flow);  } @@ -756,7 +749,7 @@ int ovs_flow_init(void)  	BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long));  	flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow) -				       + (nr_node_ids +				       + (nr_cpu_ids  					  * sizeof(struct flow_stats *)),  				       0, 0, NULL);  	if (flow_cache == NULL) diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 95c36147a6e1..e7da29021b38 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -176,7 +176,7 @@ static void do_setup(struct net_device *netdev)  	netdev->vlan_features = netdev->features;  	netdev->hw_enc_features = netdev->features; -	netdev->features |= NETIF_F_HW_VLAN_CTAG_TX; +	netdev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;  	netdev->hw_features = netdev->features & ~NETIF_F_LLTX;  	eth_hw_addr_random(netdev); diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 6b21fd068d87..7387418ac514 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -485,9 +485,15 @@ static unsigned int packet_length(const struct sk_buff *skb)  {  	unsigned int length = skb->len - ETH_HLEN; -	if (skb->protocol == htons(ETH_P_8021Q)) +	if (!skb_vlan_tag_present(skb) && +	    eth_type_vlan(skb->protocol))  		length -= VLAN_HLEN; +	/* Don't subtract for multiple VLAN tags. Most (all?) drivers allow +	 * (ETH_LEN + VLAN_HLEN) in addition to the mtu value, but almost none +	 * account for 802.1ad. e.g. is_skb_forwardable(). +	 */ +  	return length;  } |