diff options
Diffstat (limited to 'net/openvswitch/flow.c')
| -rw-r--r-- | net/openvswitch/flow.c | 118 | 
1 files changed, 72 insertions, 46 deletions
| diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 0ea128eeeab2..c8c82e109c68 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -29,6 +29,7 @@  #include <linux/module.h>  #include <linux/in.h>  #include <linux/rcupdate.h> +#include <linux/cpumask.h>  #include <linux/if_arp.h>  #include <linux/ip.h>  #include <linux/ipv6.h> @@ -72,32 +73,33 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,  {  	struct flow_stats *stats;  	int node = numa_node_id(); +	int cpu = smp_processor_id();  	int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); -	stats = rcu_dereference(flow->stats[node]); +	stats = rcu_dereference(flow->stats[cpu]); -	/* Check if already have node-specific stats. */ +	/* Check if already have CPU-specific stats. */  	if (likely(stats)) {  		spin_lock(&stats->lock);  		/* Mark if we write on the pre-allocated stats. */ -		if (node == 0 && unlikely(flow->stats_last_writer != node)) -			flow->stats_last_writer = node; +		if (cpu == 0 && unlikely(flow->stats_last_writer != cpu)) +			flow->stats_last_writer = cpu;  	} else {  		stats = rcu_dereference(flow->stats[0]); /* Pre-allocated. */  		spin_lock(&stats->lock); -		/* If the current NUMA-node is the only writer on the +		/* If the current CPU is the only writer on the  		 * pre-allocated stats keep using them.  		 */ -		if (unlikely(flow->stats_last_writer != node)) { +		if (unlikely(flow->stats_last_writer != cpu)) {  			/* A previous locker may have already allocated the -			 * stats, so we need to check again.  If node-specific +			 * stats, so we need to check again.  If CPU-specific  			 * stats were already allocated, we update the pre-  			 * allocated stats as we have already locked them.  			 */ -			if (likely(flow->stats_last_writer != NUMA_NO_NODE) -			    && likely(!rcu_access_pointer(flow->stats[node]))) { -				/* Try to allocate node-specific stats. */ +			if (likely(flow->stats_last_writer != -1) && +			    likely(!rcu_access_pointer(flow->stats[cpu]))) { +				/* Try to allocate CPU-specific stats. */  				struct flow_stats *new_stats;  				new_stats = @@ -114,12 +116,12 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,  					new_stats->tcp_flags = tcp_flags;  					spin_lock_init(&new_stats->lock); -					rcu_assign_pointer(flow->stats[node], +					rcu_assign_pointer(flow->stats[cpu],  							   new_stats);  					goto unlock;  				}  			} -			flow->stats_last_writer = node; +			flow->stats_last_writer = cpu;  		}  	} @@ -136,14 +138,15 @@ void ovs_flow_stats_get(const struct sw_flow *flow,  			struct ovs_flow_stats *ovs_stats,  			unsigned long *used, __be16 *tcp_flags)  { -	int node; +	int cpu;  	*used = 0;  	*tcp_flags = 0;  	memset(ovs_stats, 0, sizeof(*ovs_stats)); -	for_each_node(node) { -		struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[node]); +	/* We open code this to make sure cpu 0 is always considered */ +	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) { +		struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);  		if (stats) {  			/* Local CPU may write on non-local stats, so we must @@ -163,10 +166,11 @@ void ovs_flow_stats_get(const struct sw_flow *flow,  /* Called with ovs_mutex. */  void ovs_flow_stats_clear(struct sw_flow *flow)  { -	int node; +	int cpu; -	for_each_node(node) { -		struct flow_stats *stats = ovsl_dereference(flow->stats[node]); +	/* We open code this to make sure cpu 0 is always considered */ +	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) { +		struct flow_stats *stats = ovsl_dereference(flow->stats[cpu]);  		if (stats) {  			spin_lock_bh(&stats->lock); @@ -302,24 +306,57 @@ static bool icmp6hdr_ok(struct sk_buff *skb)  				  sizeof(struct icmp6hdr));  } -static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) +/** + * Parse vlan tag from vlan header. + * Returns ERROR on memory error. + * Returns 0 if it encounters a non-vlan or incomplete packet. + * Returns 1 after successfully parsing vlan tag. + */ +static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh)  { -	struct qtag_prefix { -		__be16 eth_type; /* ETH_P_8021Q */ -		__be16 tci; -	}; -	struct qtag_prefix *qp; +	struct vlan_head *vh = (struct vlan_head *)skb->data; + +	if (likely(!eth_type_vlan(vh->tpid))) +		return 0; -	if (unlikely(skb->len < sizeof(struct qtag_prefix) + sizeof(__be16))) +	if (unlikely(skb->len < sizeof(struct vlan_head) + sizeof(__be16)))  		return 0; -	if (unlikely(!pskb_may_pull(skb, sizeof(struct qtag_prefix) + -					 sizeof(__be16)))) +	if (unlikely(!pskb_may_pull(skb, sizeof(struct vlan_head) + +				 sizeof(__be16))))  		return -ENOMEM; -	qp = (struct qtag_prefix *) skb->data; -	key->eth.tci = qp->tci | htons(VLAN_TAG_PRESENT); -	__skb_pull(skb, sizeof(struct qtag_prefix)); +	vh = (struct vlan_head *)skb->data; +	key_vh->tci = vh->tci | htons(VLAN_TAG_PRESENT); +	key_vh->tpid = vh->tpid; + +	__skb_pull(skb, sizeof(struct vlan_head)); +	return 1; +} + +static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) +{ +	int res; + +	key->eth.vlan.tci = 0; +	key->eth.vlan.tpid = 0; +	key->eth.cvlan.tci = 0; +	key->eth.cvlan.tpid = 0; + +	if (likely(skb_vlan_tag_present(skb))) { +		key->eth.vlan.tci = htons(skb->vlan_tci); +		key->eth.vlan.tpid = skb->vlan_proto; +	} else { +		/* Parse outer vlan tag in the non-accelerated case. */ +		res = parse_vlan_tag(skb, &key->eth.vlan); +		if (res <= 0) +			return res; +	} + +	/* Parse inner vlan tag. */ +	res = parse_vlan_tag(skb, &key->eth.cvlan); +	if (res <= 0) +		return res;  	return 0;  } @@ -480,12 +517,8 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)  	 * update skb->csum here.  	 */ -	key->eth.tci = 0; -	if (skb_vlan_tag_present(skb)) -		key->eth.tci = htons(skb->vlan_tci); -	else if (eth->h_proto == htons(ETH_P_8021Q)) -		if (unlikely(parse_vlan(skb, key))) -			return -ENOMEM; +	if (unlikely(parse_vlan(skb, key))) +		return -ENOMEM;  	key->eth.type = parse_ethertype(skb);  	if (unlikely(key->eth.type == htons(0))) @@ -600,12 +633,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)  	} else if (eth_p_mpls(key->eth.type)) {  		size_t stack_len = MPLS_HLEN; -		/* In the presence of an MPLS label stack the end of the L2 -		 * header and the beginning of the L3 header differ. -		 * -		 * Advance network_header to the beginning of the L3 -		 * header. mac_len corresponds to the end of the L2 header. -		 */ +		skb_set_inner_network_header(skb, skb->mac_len);  		while (1) {  			__be32 lse; @@ -613,12 +641,12 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)  			if (unlikely(error))  				return 0; -			memcpy(&lse, skb_network_header(skb), MPLS_HLEN); +			memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN);  			if (stack_len == MPLS_HLEN)  				memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN); -			skb_set_network_header(skb, skb->mac_len + stack_len); +			skb_set_inner_network_header(skb, skb->mac_len + stack_len);  			if (lse & htonl(MPLS_LS_S_MASK))  				break; @@ -734,8 +762,6 @@ int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr,  {  	int err; -	memset(key, 0, OVS_SW_FLOW_KEY_METADATA_SIZE); -  	/* Extract metadata from netlink attributes. */  	err = ovs_nla_get_flow_metadata(net, attr, key, log);  	if (err) |