From 3c7eacfc8a9a4c2bd48e0093c4f43cf69afd5210 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 14 Feb 2014 11:42:36 +0100 Subject: ovs: fix dp check in ovs_dp_reset_user_features This fixes crash when userspace does "ovs-dpctl add-dp dev" where dev is existing non-dp netdevice. Introduced by: commit 44da5ae5fbea4686f667dc854e5ea16814e44c59 "openvswitch: Drop user features if old user space attempted to create datapath" Signed-off-by: Jiri Pirko Signed-off-by: Jesse Gross --- net/openvswitch/datapath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index e9a48baf8551..e42340d3f820 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1174,7 +1174,7 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *in struct datapath *dp; dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); - if (!dp) + if (IS_ERR(dp)) return; WARN(dp->user_features, "Dropping previously announced user features\n"); -- cgit From 04382a3303c22b0c536fbd0c94c1f012f2b8ed60 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Sat, 15 Feb 2014 17:37:45 -0800 Subject: openvswitch: Read tcp flags only then the tranport header is present. Only the first IP fragment can have a TCP header, check for this. Signed-off-by: Jarno Rajahalme Signed-off-by: Jesse Gross --- net/openvswitch/flow.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 16f4b46161d4..d71e60fa28cb 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -73,6 +73,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb) if ((flow->key.eth.type == htons(ETH_P_IP) || flow->key.eth.type == htons(ETH_P_IPV6)) && + flow->key.ip.frag != OVS_FRAG_TYPE_LATER && flow->key.ip.proto == IPPROTO_TCP && likely(skb->len >= skb_transport_offset(skb) + sizeof(struct tcphdr))) { tcp_flags = TCP_FLAGS_BE16(tcp_hdr(skb)); -- cgit From 42ee19e2939277a5277c307e517ce2d7ba5f0703 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Sat, 15 Feb 2014 17:42:29 -0800 Subject: openvswitch: Fix race. ovs_vport_cmd_dump() did rcu_read_lock() only after getting the datapath, which could have been deleted in between. Resolved by taking rcu_read_lock() before the get_dp() call. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- net/openvswitch/datapath.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index e42340d3f820..8601b320b443 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1762,11 +1762,12 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) int bucket = cb->args[0], skip = cb->args[1]; int i, j = 0; + rcu_read_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); - if (!dp) + if (!dp) { + rcu_read_unlock(); return -ENODEV; - - rcu_read_lock(); + } for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) { struct vport *vport; -- cgit From 87536a81e1f52409b45333ce8cac415a1218163c Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 7 Mar 2014 12:44:18 +0100 Subject: net: af_key: fix sleeping under rcu There's a kmalloc with GFP_KERNEL in a helper (pfkey_sadb2xfrm_user_sec_ctx) used in pfkey_compile_policy which is called under rcu_read_lock. Adjust pfkey_sadb2xfrm_user_sec_ctx to have a gfp argument and adjust the users. CC: Dave Jones CC: Steffen Klassert CC: Fan Du CC: David S. Miller Signed-off-by: Nikolay Aleksandrov Signed-off-by: Steffen Klassert --- net/key/af_key.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/key/af_key.c b/net/key/af_key.c index 1a04c1329362..1526023f99ed 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -433,12 +433,13 @@ static inline int verify_sec_ctx_len(const void *p) return 0; } -static inline struct xfrm_user_sec_ctx *pfkey_sadb2xfrm_user_sec_ctx(const struct sadb_x_sec_ctx *sec_ctx) +static inline struct xfrm_user_sec_ctx *pfkey_sadb2xfrm_user_sec_ctx(const struct sadb_x_sec_ctx *sec_ctx, + gfp_t gfp) { struct xfrm_user_sec_ctx *uctx = NULL; int ctx_size = sec_ctx->sadb_x_ctx_len; - uctx = kmalloc((sizeof(*uctx)+ctx_size), GFP_KERNEL); + uctx = kmalloc((sizeof(*uctx)+ctx_size), gfp); if (!uctx) return NULL; @@ -1124,7 +1125,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, sec_ctx = ext_hdrs[SADB_X_EXT_SEC_CTX - 1]; if (sec_ctx != NULL) { - struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); + struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx, GFP_KERNEL); if (!uctx) goto out; @@ -2231,7 +2232,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_ sec_ctx = ext_hdrs[SADB_X_EXT_SEC_CTX - 1]; if (sec_ctx != NULL) { - struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); + struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx, GFP_KERNEL); if (!uctx) { err = -ENOBUFS; @@ -2335,7 +2336,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa sec_ctx = ext_hdrs[SADB_X_EXT_SEC_CTX - 1]; if (sec_ctx != NULL) { - struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); + struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx, GFP_KERNEL); if (!uctx) return -ENOMEM; @@ -3239,7 +3240,7 @@ static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, } if ((*dir = verify_sec_ctx_len(p))) goto out; - uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); + uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx, GFP_ATOMIC); *dir = security_xfrm_policy_alloc(&xp->security, uctx); kfree(uctx); -- cgit From 52a4c6404f91f2d2c5592ee6365a8418c4565f53 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 7 Mar 2014 12:44:19 +0100 Subject: selinux: add gfp argument to security_xfrm_policy_alloc and fix callers security_xfrm_policy_alloc can be called in atomic context so the allocation should be done with GFP_ATOMIC. Add an argument to let the callers choose the appropriate way. In order to do so a gfp argument needs to be added to the method xfrm_policy_alloc_security in struct security_operations and to the internal function selinux_xfrm_alloc_user. After that switch to GFP_ATOMIC in the atomic callers and leave GFP_KERNEL as before for the rest. The path that needed the gfp argument addition is: security_xfrm_policy_alloc -> security_ops.xfrm_policy_alloc_security -> all users of xfrm_policy_alloc_security (e.g. selinux_xfrm_policy_alloc) -> selinux_xfrm_alloc_user (here the allocation used to be GFP_KERNEL only) Now adding a gfp argument to selinux_xfrm_alloc_user requires us to also add it to security_context_to_sid which is used inside and prior to this patch did only GFP_KERNEL allocation. So add gfp argument to security_context_to_sid and adjust all of its callers as well. CC: Paul Moore CC: Dave Jones CC: Steffen Klassert CC: Fan Du CC: David S. Miller CC: LSM list CC: SELinux list Signed-off-by: Nikolay Aleksandrov Acked-by: Paul Moore Signed-off-by: Steffen Klassert --- include/linux/security.h | 10 +++++++--- net/key/af_key.c | 6 +++--- net/xfrm/xfrm_user.c | 6 +++--- security/capability.c | 3 ++- security/security.c | 6 ++++-- security/selinux/hooks.c | 13 +++++++------ security/selinux/include/security.h | 2 +- security/selinux/include/xfrm.h | 3 ++- security/selinux/selinuxfs.c | 28 ++++++++++++++++++---------- security/selinux/ss/services.c | 6 ++++-- security/selinux/xfrm.c | 14 ++++++++------ 11 files changed, 59 insertions(+), 38 deletions(-) (limited to 'net') diff --git a/include/linux/security.h b/include/linux/security.h index 5623a7f965b7..2fc42d191f79 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1040,6 +1040,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * Allocate a security structure to the xp->security field; the security * field is initialized to NULL when the xfrm_policy is allocated. * Return 0 if operation was successful (memory to allocate, legal context) + * @gfp is to specify the context for the allocation * @xfrm_policy_clone_security: * @old_ctx contains an existing xfrm_sec_ctx. * @new_ctxp contains a new xfrm_sec_ctx being cloned from old. @@ -1683,7 +1684,7 @@ struct security_operations { #ifdef CONFIG_SECURITY_NETWORK_XFRM int (*xfrm_policy_alloc_security) (struct xfrm_sec_ctx **ctxp, - struct xfrm_user_sec_ctx *sec_ctx); + struct xfrm_user_sec_ctx *sec_ctx, gfp_t gfp); int (*xfrm_policy_clone_security) (struct xfrm_sec_ctx *old_ctx, struct xfrm_sec_ctx **new_ctx); void (*xfrm_policy_free_security) (struct xfrm_sec_ctx *ctx); int (*xfrm_policy_delete_security) (struct xfrm_sec_ctx *ctx); @@ -2859,7 +2860,8 @@ static inline void security_skb_owned_by(struct sk_buff *skb, struct sock *sk) #ifdef CONFIG_SECURITY_NETWORK_XFRM -int security_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_user_sec_ctx *sec_ctx); +int security_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp, + struct xfrm_user_sec_ctx *sec_ctx, gfp_t gfp); int security_xfrm_policy_clone(struct xfrm_sec_ctx *old_ctx, struct xfrm_sec_ctx **new_ctxp); void security_xfrm_policy_free(struct xfrm_sec_ctx *ctx); int security_xfrm_policy_delete(struct xfrm_sec_ctx *ctx); @@ -2877,7 +2879,9 @@ void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl); #else /* CONFIG_SECURITY_NETWORK_XFRM */ -static inline int security_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_user_sec_ctx *sec_ctx) +static inline int security_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp, + struct xfrm_user_sec_ctx *sec_ctx, + gfp_t gfp) { return 0; } diff --git a/net/key/af_key.c b/net/key/af_key.c index 1526023f99ed..79326978517a 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2239,7 +2239,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_ goto out; } - err = security_xfrm_policy_alloc(&xp->security, uctx); + err = security_xfrm_policy_alloc(&xp->security, uctx, GFP_KERNEL); kfree(uctx); if (err) @@ -2341,7 +2341,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa if (!uctx) return -ENOMEM; - err = security_xfrm_policy_alloc(&pol_ctx, uctx); + err = security_xfrm_policy_alloc(&pol_ctx, uctx, GFP_KERNEL); kfree(uctx); if (err) return err; @@ -3241,7 +3241,7 @@ static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, if ((*dir = verify_sec_ctx_len(p))) goto out; uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx, GFP_ATOMIC); - *dir = security_xfrm_policy_alloc(&xp->security, uctx); + *dir = security_xfrm_policy_alloc(&xp->security, uctx, GFP_ATOMIC); kfree(uctx); if (*dir) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index c274179d60a2..2f7ddc3a59b4 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1221,7 +1221,7 @@ static int copy_from_user_sec_ctx(struct xfrm_policy *pol, struct nlattr **attrs return 0; uctx = nla_data(rt); - return security_xfrm_policy_alloc(&pol->security, uctx); + return security_xfrm_policy_alloc(&pol->security, uctx, GFP_KERNEL); } static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut, @@ -1626,7 +1626,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (rt) { struct xfrm_user_sec_ctx *uctx = nla_data(rt); - err = security_xfrm_policy_alloc(&ctx, uctx); + err = security_xfrm_policy_alloc(&ctx, uctx, GFP_KERNEL); if (err) return err; } @@ -1928,7 +1928,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, if (rt) { struct xfrm_user_sec_ctx *uctx = nla_data(rt); - err = security_xfrm_policy_alloc(&ctx, uctx); + err = security_xfrm_policy_alloc(&ctx, uctx, GFP_KERNEL); if (err) return err; } diff --git a/security/capability.c b/security/capability.c index 8b4f24ae4338..21e2b9cae685 100644 --- a/security/capability.c +++ b/security/capability.c @@ -757,7 +757,8 @@ static void cap_skb_owned_by(struct sk_buff *skb, struct sock *sk) #ifdef CONFIG_SECURITY_NETWORK_XFRM static int cap_xfrm_policy_alloc_security(struct xfrm_sec_ctx **ctxp, - struct xfrm_user_sec_ctx *sec_ctx) + struct xfrm_user_sec_ctx *sec_ctx, + gfp_t gfp) { return 0; } diff --git a/security/security.c b/security/security.c index 15b6928592ef..919cad93ac82 100644 --- a/security/security.c +++ b/security/security.c @@ -1317,9 +1317,11 @@ void security_skb_owned_by(struct sk_buff *skb, struct sock *sk) #ifdef CONFIG_SECURITY_NETWORK_XFRM -int security_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_user_sec_ctx *sec_ctx) +int security_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp, + struct xfrm_user_sec_ctx *sec_ctx, + gfp_t gfp) { - return security_ops->xfrm_policy_alloc_security(ctxp, sec_ctx); + return security_ops->xfrm_policy_alloc_security(ctxp, sec_ctx, gfp); } EXPORT_SYMBOL(security_xfrm_policy_alloc); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 4b34847208cc..b332e2cc0954 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -668,7 +668,7 @@ static int selinux_set_mnt_opts(struct super_block *sb, if (flags[i] == SBLABEL_MNT) continue; rc = security_context_to_sid(mount_options[i], - strlen(mount_options[i]), &sid); + strlen(mount_options[i]), &sid, GFP_KERNEL); if (rc) { printk(KERN_WARNING "SELinux: security_context_to_sid" "(%s) failed for (dev %s, type %s) errno=%d\n", @@ -2489,7 +2489,8 @@ static int selinux_sb_remount(struct super_block *sb, void *data) if (flags[i] == SBLABEL_MNT) continue; len = strlen(mount_options[i]); - rc = security_context_to_sid(mount_options[i], len, &sid); + rc = security_context_to_sid(mount_options[i], len, &sid, + GFP_KERNEL); if (rc) { printk(KERN_WARNING "SELinux: security_context_to_sid" "(%s) failed for (dev %s, type %s) errno=%d\n", @@ -2893,7 +2894,7 @@ static int selinux_inode_setxattr(struct dentry *dentry, const char *name, if (rc) return rc; - rc = security_context_to_sid(value, size, &newsid); + rc = security_context_to_sid(value, size, &newsid, GFP_KERNEL); if (rc == -EINVAL) { if (!capable(CAP_MAC_ADMIN)) { struct audit_buffer *ab; @@ -3050,7 +3051,7 @@ static int selinux_inode_setsecurity(struct inode *inode, const char *name, if (!value || !size) return -EACCES; - rc = security_context_to_sid((void *)value, size, &newsid); + rc = security_context_to_sid((void *)value, size, &newsid, GFP_KERNEL); if (rc) return rc; @@ -5529,7 +5530,7 @@ static int selinux_setprocattr(struct task_struct *p, str[size-1] = 0; size--; } - error = security_context_to_sid(value, size, &sid); + error = security_context_to_sid(value, size, &sid, GFP_KERNEL); if (error == -EINVAL && !strcmp(name, "fscreate")) { if (!capable(CAP_MAC_ADMIN)) { struct audit_buffer *ab; @@ -5638,7 +5639,7 @@ static int selinux_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) static int selinux_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid) { - return security_context_to_sid(secdata, seclen, secid); + return security_context_to_sid(secdata, seclen, secid, GFP_KERNEL); } static void selinux_release_secctx(char *secdata, u32 seclen) diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index 8ed8daf7f1ee..ce7852cf526b 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -134,7 +134,7 @@ int security_sid_to_context(u32 sid, char **scontext, int security_sid_to_context_force(u32 sid, char **scontext, u32 *scontext_len); int security_context_to_sid(const char *scontext, u32 scontext_len, - u32 *out_sid); + u32 *out_sid, gfp_t gfp); int security_context_to_sid_default(const char *scontext, u32 scontext_len, u32 *out_sid, u32 def_sid, gfp_t gfp_flags); diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h index 48c3cc94c168..9f0584710c85 100644 --- a/security/selinux/include/xfrm.h +++ b/security/selinux/include/xfrm.h @@ -10,7 +10,8 @@ #include int selinux_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp, - struct xfrm_user_sec_ctx *uctx); + struct xfrm_user_sec_ctx *uctx, + gfp_t gfp); int selinux_xfrm_policy_clone(struct xfrm_sec_ctx *old_ctx, struct xfrm_sec_ctx **new_ctxp); void selinux_xfrm_policy_free(struct xfrm_sec_ctx *ctx); diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 5122affe06a8..d60c0ee66387 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -576,7 +576,7 @@ static ssize_t sel_write_context(struct file *file, char *buf, size_t size) if (length) goto out; - length = security_context_to_sid(buf, size, &sid); + length = security_context_to_sid(buf, size, &sid, GFP_KERNEL); if (length) goto out; @@ -731,11 +731,13 @@ static ssize_t sel_write_access(struct file *file, char *buf, size_t size) if (sscanf(buf, "%s %s %hu", scon, tcon, &tclass) != 3) goto out; - length = security_context_to_sid(scon, strlen(scon) + 1, &ssid); + length = security_context_to_sid(scon, strlen(scon) + 1, &ssid, + GFP_KERNEL); if (length) goto out; - length = security_context_to_sid(tcon, strlen(tcon) + 1, &tsid); + length = security_context_to_sid(tcon, strlen(tcon) + 1, &tsid, + GFP_KERNEL); if (length) goto out; @@ -817,11 +819,13 @@ static ssize_t sel_write_create(struct file *file, char *buf, size_t size) objname = namebuf; } - length = security_context_to_sid(scon, strlen(scon) + 1, &ssid); + length = security_context_to_sid(scon, strlen(scon) + 1, &ssid, + GFP_KERNEL); if (length) goto out; - length = security_context_to_sid(tcon, strlen(tcon) + 1, &tsid); + length = security_context_to_sid(tcon, strlen(tcon) + 1, &tsid, + GFP_KERNEL); if (length) goto out; @@ -878,11 +882,13 @@ static ssize_t sel_write_relabel(struct file *file, char *buf, size_t size) if (sscanf(buf, "%s %s %hu", scon, tcon, &tclass) != 3) goto out; - length = security_context_to_sid(scon, strlen(scon) + 1, &ssid); + length = security_context_to_sid(scon, strlen(scon) + 1, &ssid, + GFP_KERNEL); if (length) goto out; - length = security_context_to_sid(tcon, strlen(tcon) + 1, &tsid); + length = security_context_to_sid(tcon, strlen(tcon) + 1, &tsid, + GFP_KERNEL); if (length) goto out; @@ -934,7 +940,7 @@ static ssize_t sel_write_user(struct file *file, char *buf, size_t size) if (sscanf(buf, "%s %s", con, user) != 2) goto out; - length = security_context_to_sid(con, strlen(con) + 1, &sid); + length = security_context_to_sid(con, strlen(con) + 1, &sid, GFP_KERNEL); if (length) goto out; @@ -994,11 +1000,13 @@ static ssize_t sel_write_member(struct file *file, char *buf, size_t size) if (sscanf(buf, "%s %s %hu", scon, tcon, &tclass) != 3) goto out; - length = security_context_to_sid(scon, strlen(scon) + 1, &ssid); + length = security_context_to_sid(scon, strlen(scon) + 1, &ssid, + GFP_KERNEL); if (length) goto out; - length = security_context_to_sid(tcon, strlen(tcon) + 1, &tsid); + length = security_context_to_sid(tcon, strlen(tcon) + 1, &tsid, + GFP_KERNEL); if (length) goto out; diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 5d0144ee8ed6..4bca49414a40 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -1289,16 +1289,18 @@ out: * @scontext: security context * @scontext_len: length in bytes * @sid: security identifier, SID + * @gfp: context for the allocation * * Obtains a SID associated with the security context that * has the string representation specified by @scontext. * Returns -%EINVAL if the context is invalid, -%ENOMEM if insufficient * memory is available, or 0 on success. */ -int security_context_to_sid(const char *scontext, u32 scontext_len, u32 *sid) +int security_context_to_sid(const char *scontext, u32 scontext_len, u32 *sid, + gfp_t gfp) { return security_context_to_sid_core(scontext, scontext_len, - sid, SECSID_NULL, GFP_KERNEL, 0); + sid, SECSID_NULL, gfp, 0); } /** diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index 0462cb3ff0a7..98b042630a9e 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -78,7 +78,8 @@ static inline int selinux_authorizable_xfrm(struct xfrm_state *x) * xfrm_user_sec_ctx context. */ static int selinux_xfrm_alloc_user(struct xfrm_sec_ctx **ctxp, - struct xfrm_user_sec_ctx *uctx) + struct xfrm_user_sec_ctx *uctx, + gfp_t gfp) { int rc; const struct task_security_struct *tsec = current_security(); @@ -94,7 +95,7 @@ static int selinux_xfrm_alloc_user(struct xfrm_sec_ctx **ctxp, if (str_len >= PAGE_SIZE) return -ENOMEM; - ctx = kmalloc(sizeof(*ctx) + str_len + 1, GFP_KERNEL); + ctx = kmalloc(sizeof(*ctx) + str_len + 1, gfp); if (!ctx) return -ENOMEM; @@ -103,7 +104,7 @@ static int selinux_xfrm_alloc_user(struct xfrm_sec_ctx **ctxp, ctx->ctx_len = str_len; memcpy(ctx->ctx_str, &uctx[1], str_len); ctx->ctx_str[str_len] = '\0'; - rc = security_context_to_sid(ctx->ctx_str, str_len, &ctx->ctx_sid); + rc = security_context_to_sid(ctx->ctx_str, str_len, &ctx->ctx_sid, gfp); if (rc) goto err; @@ -282,9 +283,10 @@ int selinux_xfrm_skb_sid(struct sk_buff *skb, u32 *sid) * LSM hook implementation that allocs and transfers uctx spec to xfrm_policy. */ int selinux_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp, - struct xfrm_user_sec_ctx *uctx) + struct xfrm_user_sec_ctx *uctx, + gfp_t gfp) { - return selinux_xfrm_alloc_user(ctxp, uctx); + return selinux_xfrm_alloc_user(ctxp, uctx, gfp); } /* @@ -332,7 +334,7 @@ int selinux_xfrm_policy_delete(struct xfrm_sec_ctx *ctx) int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *uctx) { - return selinux_xfrm_alloc_user(&x->security, uctx); + return selinux_xfrm_alloc_user(&x->security, uctx, GFP_KERNEL); } /* -- cgit From e367c2d03dba4c9bcafad24688fadb79dd95b218 Mon Sep 17 00:00:00 2001 From: lucien Date: Mon, 17 Mar 2014 12:51:01 +0800 Subject: ipv6: ip6_append_data_mtu do not handle the mtu of the second fragment properly In ip6_append_data_mtu(), when the xfrm mode is not tunnel(such as transport),the ipsec header need to be added in the first fragment, so the mtu will decrease to reserve space for it, then the second fragment come, the mtu should be turn back, as the commit 0c1833797a5a6ec23ea9261d979aa18078720b74 said. however, in the commit a493e60ac4bbe2e977e7129d6d8cbb0dd236be, it use *mtu = min(*mtu, ...) to change the mtu, which lead to the new mtu is alway equal with the first fragment's. and cannot turn back. when I test through ping6 -c1 -s5000 $ip (mtu=1280): ...frag (0|1232) ESP(spi=0x00002000,seq=0xb), length 1232 ...frag (1232|1216) ...frag (2448|1216) ...frag (3664|1216) ...frag (4880|164) which should be: ...frag (0|1232) ESP(spi=0x00001000,seq=0x1), length 1232 ...frag (1232|1232) ...frag (2464|1232) ...frag (3696|1232) ...frag (4928|116) so delete the min() when change back the mtu. Signed-off-by: Xin Long Fixes: 75a493e60ac4bb ("ipv6: ip6_append_data_mtu did not care about pmtudisc and frag_size") Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 16f91a2e7888..64d6073731d3 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1101,21 +1101,19 @@ static void ip6_append_data_mtu(unsigned int *mtu, unsigned int fragheaderlen, struct sk_buff *skb, struct rt6_info *rt, - bool pmtuprobe) + unsigned int orig_mtu) { if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { if (skb == NULL) { /* first fragment, reserve header_len */ - *mtu = *mtu - rt->dst.header_len; + *mtu = orig_mtu - rt->dst.header_len; } else { /* * this fragment is not first, the headers * space is regarded as data space. */ - *mtu = min(*mtu, pmtuprobe ? - rt->dst.dev->mtu : - dst_mtu(rt->dst.path)); + *mtu = orig_mtu; } *maxfraglen = ((*mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); @@ -1132,7 +1130,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, struct ipv6_pinfo *np = inet6_sk(sk); struct inet_cork *cork; struct sk_buff *skb, *skb_prev = NULL; - unsigned int maxfraglen, fragheaderlen, mtu; + unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu; int exthdrlen; int dst_exthdrlen; int hh_len; @@ -1214,6 +1212,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, dst_exthdrlen = 0; mtu = cork->fragsize; } + orig_mtu = mtu; hh_len = LL_RESERVED_SPACE(rt->dst.dev); @@ -1311,8 +1310,7 @@ alloc_new_skb: if (skb == NULL || skb_prev == NULL) ip6_append_data_mtu(&mtu, &maxfraglen, fragheaderlen, skb, rt, - np->pmtudisc >= - IPV6_PMTUDISC_PROBE); + orig_mtu); skb_prev = skb; -- cgit From f9b8c4c8baded129535d82d74df8e87a7a369f54 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 20 Mar 2014 10:45:21 -0700 Subject: openvswitch: Correctly report flow used times for first 5 minutes after boot. The kernel starts out its "jiffies" timer as 5 minutes below zero, as shown in include/linux/jiffies.h: /* * Have the 32 bit jiffies value wrap 5 minutes after boot * so jiffies wrap bugs show up earlier. */ #define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ)) The loop in ovs_flow_stats_get() starts out with 'used' set to 0, then takes any "later" time. This means that for the first five minutes after boot, flows will always be reported as never used, since 0 is greater than any time already seen. Signed-off-by: Ben Pfaff Acked-by: Pravin B Shelar Signed-off-by: Jesse Gross --- net/openvswitch/flow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index d71e60fa28cb..dda451f4429c 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -92,7 +92,7 @@ static void stats_read(struct flow_stats *stats, unsigned long *used, __be16 *tcp_flags) { spin_lock(&stats->lock); - if (time_after(stats->used, *used)) + if (!*used || time_after(stats->used, *used)) *used = stats->used; *tcp_flags |= stats->tcp_flags; ovs_stats->n_packets += stats->packet_count; -- cgit From 1c104a6bebf3c16b6248408b84f91d09ac8a26b6 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 19 Mar 2014 17:47:49 +0100 Subject: rtnetlink: fix fdb notification flags Commit 3ff661c38c84 ("net: rtnetlink notify events for FDB NTF_SELF adds and deletes") reuses the function nlmsg_populate_fdb_fill() to notify fdb events. But this function was used only for dump and thus was always setting the flag NLM_F_MULTI, which is wrong in case of a single notification. Libraries like libnl will wait forever for NLMSG_DONE. CC: Thomas Graf Signed-off-by: Nicolas Dichtel Acked-by: Thomas Graf Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 1a0dac2ef9ad..120eecc0f5a4 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2121,12 +2121,13 @@ EXPORT_SYMBOL(rtmsg_ifinfo); static int nlmsg_populate_fdb_fill(struct sk_buff *skb, struct net_device *dev, u8 *addr, u32 pid, u32 seq, - int type, unsigned int flags) + int type, unsigned int flags, + int nlflags) { struct nlmsghdr *nlh; struct ndmsg *ndm; - nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), NLM_F_MULTI); + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), nlflags); if (!nlh) return -EMSGSIZE; @@ -2164,7 +2165,7 @@ static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, int type) if (!skb) goto errout; - err = nlmsg_populate_fdb_fill(skb, dev, addr, 0, 0, type, NTF_SELF); + err = nlmsg_populate_fdb_fill(skb, dev, addr, 0, 0, type, NTF_SELF, 0); if (err < 0) { kfree_skb(skb); goto errout; @@ -2389,7 +2390,8 @@ static int nlmsg_populate_fdb(struct sk_buff *skb, err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, portid, seq, - RTM_NEWNEIGH, NTF_SELF); + RTM_NEWNEIGH, NTF_SELF, + NLM_F_MULTI); if (err < 0) return err; skip: -- cgit From 65886f439ab0fdc2dff20d1fa87afb98c6717472 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 19 Mar 2014 17:47:50 +0100 Subject: ipmr: fix mfc notification flags Commit 8cd3ac9f9b7b ("ipmr: advertise new mfc entries via rtnl") reuses the function ipmr_fill_mroute() to notify mfc events. But this function was used only for dump and thus was always setting the flag NLM_F_MULTI, which is wrong in case of a single notification. Libraries like libnl will wait forever for NLMSG_DONE. CC: Thomas Graf Signed-off-by: Nicolas Dichtel Acked-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index b9b3472975ba..28863570dd60 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2255,13 +2255,14 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, } static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, - u32 portid, u32 seq, struct mfc_cache *c, int cmd) + u32 portid, u32 seq, struct mfc_cache *c, int cmd, + int flags) { struct nlmsghdr *nlh; struct rtmsg *rtm; int err; - nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), NLM_F_MULTI); + nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); if (nlh == NULL) return -EMSGSIZE; @@ -2329,7 +2330,7 @@ static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, if (skb == NULL) goto errout; - err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd); + err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); if (err < 0) goto errout; @@ -2368,7 +2369,8 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) if (ipmr_fill_mroute(mrt, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - mfc, RTM_NEWROUTE) < 0) + mfc, RTM_NEWROUTE, + NLM_F_MULTI) < 0) goto done; next_entry: e++; @@ -2382,7 +2384,8 @@ next_entry: if (ipmr_fill_mroute(mrt, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - mfc, RTM_NEWROUTE) < 0) { + mfc, RTM_NEWROUTE, + NLM_F_MULTI) < 0) { spin_unlock_bh(&mfc_unres_lock); goto done; } -- cgit From f518338b16038beeb73e155e60d0f70beb9379f4 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 19 Mar 2014 17:47:51 +0100 Subject: ip6mr: fix mfc notification flags Commit 812e44dd1829 ("ip6mr: advertise new mfc entries via rtnl") reuses the function ip6mr_fill_mroute() to notify mfc events. But this function was used only for dump and thus was always setting the flag NLM_F_MULTI, which is wrong in case of a single notification. Libraries like libnl will wait forever for NLMSG_DONE. CC: Thomas Graf Signed-off-by: Nicolas Dichtel Acked-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 0eb4038a4d63..8737400af0a0 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2349,13 +2349,14 @@ int ip6mr_get_route(struct net *net, } static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, - u32 portid, u32 seq, struct mfc6_cache *c, int cmd) + u32 portid, u32 seq, struct mfc6_cache *c, int cmd, + int flags) { struct nlmsghdr *nlh; struct rtmsg *rtm; int err; - nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), NLM_F_MULTI); + nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); if (nlh == NULL) return -EMSGSIZE; @@ -2423,7 +2424,7 @@ static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc, if (skb == NULL) goto errout; - err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd); + err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); if (err < 0) goto errout; @@ -2462,7 +2463,8 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) if (ip6mr_fill_mroute(mrt, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - mfc, RTM_NEWROUTE) < 0) + mfc, RTM_NEWROUTE, + NLM_F_MULTI) < 0) goto done; next_entry: e++; @@ -2476,7 +2478,8 @@ next_entry: if (ip6mr_fill_mroute(mrt, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - mfc, RTM_NEWROUTE) < 0) { + mfc, RTM_NEWROUTE, + NLM_F_MULTI) < 0) { spin_unlock_bh(&mfc_unres_lock); goto done; } -- cgit From c27f0872a3448c46e561e226b5b97f77187b06d2 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Fri, 21 Mar 2014 20:53:57 +0800 Subject: netpoll: fix the skb check in pkt_is_ns Neighbor Solicitation is ipv6 protocol, so we should check skb->protocol with ETH_P_IPV6 Signed-off-by: Li RongQing Cc: WANG Cong Signed-off-by: David S. Miller --- net/core/netpoll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index a664f7829a6d..df9e6b1a9759 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -742,7 +742,7 @@ static bool pkt_is_ns(struct sk_buff *skb) struct nd_msg *msg; struct ipv6hdr *hdr; - if (skb->protocol != htons(ETH_P_ARP)) + if (skb->protocol != htons(ETH_P_IPV6)) return false; if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg))) return false; -- cgit From a5d0e7c037119484a7006b883618bfa87996cb41 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Mon, 24 Mar 2014 16:56:38 +0100 Subject: tipc: fix spinlock recursion bug for failed subscriptions If a topology event subscription fails for any reason, such as out of memory, max number reached or because we received an invalid request the correct behavior is to terminate the subscribers connection to the topology server. This is currently broken and produces the following oops: [27.953662] tipc: Subscription rejected, illegal request [27.955329] BUG: spinlock recursion on CPU#1, kworker/u4:0/6 [27.957066] lock: 0xffff88003c67f408, .magic: dead4ead, .owner: kworker/u4:0/6, .owner_cpu: 1 [27.958054] CPU: 1 PID: 6 Comm: kworker/u4:0 Not tainted 3.14.0-rc6+ #5 [27.960230] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [27.960874] Workqueue: tipc_rcv tipc_recv_work [tipc] [27.961430] ffff88003c67f408 ffff88003de27c18 ffffffff815c0207 ffff88003de1c050 [27.962292] ffff88003de27c38 ffffffff815beec5 ffff88003c67f408 ffffffff817f0a8a [27.963152] ffff88003de27c58 ffffffff815beeeb ffff88003c67f408 ffffffffa0013520 [27.964023] Call Trace: [27.964292] [] dump_stack+0x45/0x56 [27.964874] [] spin_dump+0x8c/0x91 [27.965420] [] spin_bug+0x21/0x26 [27.965995] [] do_raw_spin_lock+0x116/0x140 [27.966631] [] _raw_spin_lock_bh+0x15/0x20 [27.967256] [] subscr_conn_shutdown_event+0x20/0xa0 [tipc] [27.968051] [] tipc_close_conn+0xa4/0xb0 [tipc] [27.968722] [] tipc_conn_terminate+0x1a/0x30 [tipc] [27.969436] [] subscr_conn_msg_event+0x1f2/0x2f0 [tipc] [27.970209] [] tipc_receive_from_sock+0x90/0xf0 [tipc] [27.970972] [] tipc_recv_work+0x29/0x50 [tipc] [27.971633] [] process_one_work+0x165/0x3e0 [27.972267] [] worker_thread+0x119/0x3a0 [27.972896] [] ? manage_workers.isra.25+0x2a0/0x2a0 [27.973622] [] kthread+0xdf/0x100 [27.974168] [] ? kthread_create_on_node+0x1a0/0x1a0 [27.974893] [] ret_from_fork+0x7c/0xb0 [27.975466] [] ? kthread_create_on_node+0x1a0/0x1a0 The recursion occurs when subscr_terminate tries to grab the subscriber lock, which is already taken by subscr_conn_msg_event. We fix this by checking if the request to establish a new subscription was successful, and if not we initiate termination of the subscriber after we have released the subscriber lock. Signed-off-by: Erik Hugne Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/subscr.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 11c9ae00837d..642437231ad5 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -263,9 +263,9 @@ static void subscr_cancel(struct tipc_subscr *s, * * Called with subscriber lock held. */ -static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s, - struct tipc_subscriber *subscriber) -{ +static int subscr_subscribe(struct tipc_subscr *s, + struct tipc_subscriber *subscriber, + struct tipc_subscription **sub_p) { struct tipc_subscription *sub; int swap; @@ -276,23 +276,21 @@ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s, if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) { s->filter &= ~htohl(TIPC_SUB_CANCEL, swap); subscr_cancel(s, subscriber); - return NULL; + return 0; } /* Refuse subscription if global limit exceeded */ if (atomic_read(&subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) { pr_warn("Subscription rejected, limit reached (%u)\n", TIPC_MAX_SUBSCRIPTIONS); - subscr_terminate(subscriber); - return NULL; + return -EINVAL; } /* Allocate subscription object */ sub = kmalloc(sizeof(*sub), GFP_ATOMIC); if (!sub) { pr_warn("Subscription rejected, no memory\n"); - subscr_terminate(subscriber); - return NULL; + return -ENOMEM; } /* Initialize subscription object */ @@ -306,8 +304,7 @@ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s, (sub->seq.lower > sub->seq.upper)) { pr_warn("Subscription rejected, illegal request\n"); kfree(sub); - subscr_terminate(subscriber); - return NULL; + return -EINVAL; } INIT_LIST_HEAD(&sub->nameseq_list); list_add(&sub->subscription_list, &subscriber->subscription_list); @@ -320,8 +317,8 @@ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s, (Handler)subscr_timeout, (unsigned long)sub); k_start_timer(&sub->timer, sub->timeout); } - - return sub; + *sub_p = sub; + return 0; } /* Handle one termination request for the subscriber */ @@ -335,10 +332,14 @@ static void subscr_conn_msg_event(int conid, struct sockaddr_tipc *addr, void *usr_data, void *buf, size_t len) { struct tipc_subscriber *subscriber = usr_data; - struct tipc_subscription *sub; + struct tipc_subscription *sub = NULL; spin_lock_bh(&subscriber->lock); - sub = subscr_subscribe((struct tipc_subscr *)buf, subscriber); + if (subscr_subscribe((struct tipc_subscr *)buf, subscriber, &sub) < 0) { + spin_unlock_bh(&subscriber->lock); + subscr_terminate(subscriber); + return; + } if (sub) tipc_nametbl_subscribe(sub); spin_unlock_bh(&subscriber->lock); -- cgit From fbd02dd405d0724a0f25897ed4a6813297c9b96f Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Sun, 23 Mar 2014 22:06:36 -0700 Subject: ip_tunnel: Fix dst ref-count. Commit 10ddceb22ba (ip_tunnel:multicast process cause panic due to skb->_skb_refdst NULL pointer) removed dst-drop call from ip-tunnel-recv. Following commit reintroduce dst-drop and fix the original bug by checking loopback packet before releasing dst. Original bug: https://bugzilla.kernel.org/show_bug.cgi?id=70681 CC: Xin Long Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/ipv4/gre_demux.c | 8 ++++++++ net/ipv4/ip_tunnel.c | 3 --- net/ipv4/ip_tunnel_core.c | 1 + 3 files changed, 9 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 1863422fb7d5..250be7421ab3 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -182,6 +182,14 @@ static int gre_cisco_rcv(struct sk_buff *skb) int i; bool csum_err = false; +#ifdef CONFIG_NET_IPGRE_BROADCAST + if (ipv4_is_multicast(ip_hdr(skb)->daddr)) { + /* Looped back packet, drop it! */ + if (rt_is_output_route(skb_rtable(skb))) + goto drop; + } +#endif + if (parse_gre_header(skb, &tpi, &csum_err) < 0) goto drop; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 78a89e61925d..a82a22d8f77f 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -416,9 +416,6 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(iph->daddr)) { - /* Looped back packet, drop it! */ - if (rt_is_output_route(skb_rtable(skb))) - goto drop; tunnel->dev->stats.multicast++; skb->pkt_type = PACKET_BROADCAST; } diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 6f847dd56dbc..8d69626f2206 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -108,6 +108,7 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto) nf_reset(skb); secpath_reset(skb); skb_clear_hash_if_not_l4(skb); + skb_dst_drop(skb); skb->vlan_tci = 0; skb_set_queue_mapping(skb, 0); skb->pkt_type = PACKET_HOST; -- cgit From de1443916791d75fdd26becb116898277bb0273f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Mar 2014 18:42:27 -0700 Subject: net: unix: non blocking recvmsg() should not return -EINTR Some applications didn't expect recvmsg() on a non blocking socket could return -EINTR. This possibility was added as a side effect of commit b3ca9b02b00704 ("net: fix multithreaded signal handling in unix recv routines"). To hit this bug, you need to be a bit unlucky, as the u->readlock mutex is usually held for very small periods. Fixes: b3ca9b02b00704 ("net: fix multithreaded signal handling in unix recv routines") Signed-off-by: Eric Dumazet Cc: Rainer Weikusat Signed-off-by: David S. Miller --- net/unix/af_unix.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index ce6ec6c2f4de..94404f19f9de 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1787,8 +1787,11 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, goto out; err = mutex_lock_interruptible(&u->readlock); - if (err) { - err = sock_intr_errno(sock_rcvtimeo(sk, noblock)); + if (unlikely(err)) { + /* recvmsg() in non blocking mode is supposed to return -EAGAIN + * sk_rcvtimeo is not honored by mutex_lock_interruptible() + */ + err = noblock ? -EAGAIN : -ERESTARTSYS; goto out; } @@ -1913,6 +1916,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, struct unix_sock *u = unix_sk(sk); DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); int copied = 0; + int noblock = flags & MSG_DONTWAIT; int check_creds = 0; int target; int err = 0; @@ -1928,7 +1932,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, goto out; target = sock_rcvlowat(sk, flags&MSG_WAITALL, size); - timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT); + timeo = sock_rcvtimeo(sk, noblock); /* Lock the socket to prevent queue disordering * while sleeps in memcpy_tomsg @@ -1940,8 +1944,11 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, } err = mutex_lock_interruptible(&u->readlock); - if (err) { - err = sock_intr_errno(timeo); + if (unlikely(err)) { + /* recvmsg() in non blocking mode is supposed to return -EAGAIN + * sk_rcvtimeo is not honored by mutex_lock_interruptible() + */ + err = noblock ? -EAGAIN : -ERESTARTSYS; goto out; } -- cgit From fc0d48b8fb449ca007b2057328abf736cb516168 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 26 Mar 2014 11:47:56 -0400 Subject: vlan: Set hard_header_len according to available acceleration Currently, if the card supports CTAG acceleration we do not account for the vlan header even if we are configuring an 8021AD vlan. This may not be best since we'll do software tagging for 8021AD which will cause data copy on skb head expansion Configure the length based on available hw offload capabilities and vlan protocol. CC: Patrick McHardy Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/8021q/vlan.c | 4 +++- net/8021q/vlan_dev.c | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index ec9909935fb6..175273f38cb1 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -307,9 +307,11 @@ static void vlan_sync_address(struct net_device *dev, static void vlan_transfer_features(struct net_device *dev, struct net_device *vlandev) { + struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev); + vlandev->gso_max_size = dev->gso_max_size; - if (dev->features & NETIF_F_HW_VLAN_CTAG_TX) + if (vlan_hw_offload_capable(dev->features, vlan->vlan_proto)) vlandev->hard_header_len = dev->hard_header_len; else vlandev->hard_header_len = dev->hard_header_len + VLAN_HLEN; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 4b65aa492fb6..a9591ff2b678 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -592,7 +592,8 @@ static int vlan_dev_init(struct net_device *dev) #endif dev->needed_headroom = real_dev->needed_headroom; - if (real_dev->features & NETIF_F_HW_VLAN_CTAG_TX) { + if (vlan_hw_offload_capable(real_dev->features, + vlan_dev_priv(dev)->vlan_proto)) { dev->header_ops = &vlan_passthru_header_ops; dev->hard_header_len = real_dev->hard_header_len; } else { -- cgit From 36d5fe6a000790f56039afe26834265db0a3ad4c Mon Sep 17 00:00:00 2001 From: Zoltan Kiss Date: Wed, 26 Mar 2014 22:37:45 +0000 Subject: core, nfqueue, openvswitch: Orphan frags in skb_zerocopy and handle errors skb_zerocopy can copy elements of the frags array between skbs, but it doesn't orphan them. Also, it doesn't handle errors, so this patch takes care of that as well, and modify the callers accordingly. skb_tx_error() is also added to the callers so they will signal the failed delivery towards the creator of the skb. Signed-off-by: Zoltan Kiss Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++-- net/core/skbuff.c | 27 ++++++++++++++++++++------- net/netfilter/nfnetlink_queue_core.c | 9 +++++++-- net/openvswitch/datapath.c | 6 +++++- 4 files changed, 34 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5e1e6f2d98c2..15ede6a823a6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2451,8 +2451,8 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, unsigned int flags); void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); unsigned int skb_zerocopy_headlen(const struct sk_buff *from); -void skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, - int len, int hlen); +int skb_zerocopy(struct sk_buff *to, struct sk_buff *from, + int len, int hlen); void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); void skb_scrub_packet(struct sk_buff *skb, bool xnet); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 869c7afe3b07..97e5a2c3d947 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2127,25 +2127,31 @@ EXPORT_SYMBOL_GPL(skb_zerocopy_headlen); * * The `hlen` as calculated by skb_zerocopy_headlen() specifies the * headroom in the `to` buffer. + * + * Return value: + * 0: everything is OK + * -ENOMEM: couldn't orphan frags of @from due to lack of memory + * -EFAULT: skb_copy_bits() found some problem with skb geometry */ -void -skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) +int +skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen) { int i, j = 0; int plen = 0; /* length of skb->head fragment */ + int ret; struct page *page; unsigned int offset; BUG_ON(!from->head_frag && !hlen); /* dont bother with small payloads */ - if (len <= skb_tailroom(to)) { - skb_copy_bits(from, 0, skb_put(to, len), len); - return; - } + if (len <= skb_tailroom(to)) + return skb_copy_bits(from, 0, skb_put(to, len), len); if (hlen) { - skb_copy_bits(from, 0, skb_put(to, hlen), hlen); + ret = skb_copy_bits(from, 0, skb_put(to, hlen), hlen); + if (unlikely(ret)) + return ret; len -= hlen; } else { plen = min_t(int, skb_headlen(from), len); @@ -2163,6 +2169,11 @@ skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) to->len += len + plen; to->data_len += len + plen; + if (unlikely(skb_orphan_frags(from, GFP_ATOMIC))) { + skb_tx_error(from); + return -ENOMEM; + } + for (i = 0; i < skb_shinfo(from)->nr_frags; i++) { if (!len) break; @@ -2173,6 +2184,8 @@ skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) j++; } skb_shinfo(to)->nr_frags = j; + + return 0; } EXPORT_SYMBOL_GPL(skb_zerocopy); diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index f072fe803510..108120f216b1 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -354,13 +354,16 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, skb = nfnetlink_alloc_skb(net, size, queue->peer_portid, GFP_ATOMIC); - if (!skb) + if (!skb) { + skb_tx_error(entskb); return NULL; + } nlh = nlmsg_put(skb, 0, 0, NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET, sizeof(struct nfgenmsg), 0); if (!nlh) { + skb_tx_error(entskb); kfree_skb(skb); return NULL; } @@ -488,13 +491,15 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, nla->nla_type = NFQA_PAYLOAD; nla->nla_len = nla_attr_size(data_len); - skb_zerocopy(skb, entskb, data_len, hlen); + if (skb_zerocopy(skb, entskb, data_len, hlen)) + goto nla_put_failure; } nlh->nlmsg_len = skb->len; return skb; nla_put_failure: + skb_tx_error(entskb); kfree_skb(skb); net_err_ratelimited("nf_queue: error creating packet message\n"); return NULL; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 8601b320b443..270b77dfac30 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -464,7 +464,9 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, } nla->nla_len = nla_attr_size(skb->len); - skb_zerocopy(user_skb, skb, skb->len, hlen); + err = skb_zerocopy(user_skb, skb, skb->len, hlen); + if (err) + goto out; /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */ if (!(dp->user_features & OVS_DP_F_UNALIGNED)) { @@ -478,6 +480,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid); out: + if (err) + skb_tx_error(skb); kfree_skb(nskb); return err; } -- cgit From 12464bb8de021a01fa7ec9299c273c247df7f198 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Thu, 27 Mar 2014 21:46:55 +0900 Subject: bridge: Fix inabillity to retrieve vlan tags when tx offload is disabled Bridge vlan code (br_vlan_get_tag()) assumes that all frames have vlan_tci if they are tagged, but if vlan tx offload is manually disabled on bridge device and frames are sent from vlan device on the bridge device, the tags are embedded in skb->data and they break this assumption. Extract embedded vlan tags and move them to vlan_tci at ingress. Signed-off-by: Toshiaki Makita Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/bridge/br_device.c | 6 +++--- net/bridge/br_vlan.c | 12 ++++++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 63f0455c0bc3..8fe8b71b487a 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -49,14 +49,14 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) brstats->tx_bytes += skb->len; u64_stats_update_end(&brstats->syncp); - if (!br_allowed_ingress(br, br_get_vlan_info(br), skb, &vid)) - goto out; - BR_INPUT_SKB_CB(skb)->brdev = dev; skb_reset_mac_header(skb); skb_pull(skb, ETH_HLEN); + if (!br_allowed_ingress(br, br_get_vlan_info(br), skb, &vid)) + goto out; + if (is_broadcast_ether_addr(dest)) br_flood_deliver(br, skb, false); else if (is_multicast_ether_addr(dest)) { diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 8249ca764c79..44f31af0b965 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -174,6 +174,18 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, if (!v) return false; + /* If vlan tx offload is disabled on bridge device and frame was + * sent from vlan device on the bridge device, it does not have + * HW accelerated vlan tag. + */ + if (unlikely(!vlan_tx_tag_present(skb) && + (skb->protocol == htons(ETH_P_8021Q) || + skb->protocol == htons(ETH_P_8021AD)))) { + skb = vlan_untag(skb); + if (unlikely(!skb)) + return false; + } + err = br_vlan_get_tag(skb, vid); if (!*vid) { u16 pvid = br_get_pvid(v); -- cgit From 99b192da9c99284ad3374132e56f66995cadc6b4 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Thu, 27 Mar 2014 21:46:56 +0900 Subject: bridge: Fix handling stacked vlan tags If a bridge with vlan_filtering enabled receives frames with stacked vlan tags, i.e., they have two vlan tags, br_vlan_untag() strips not only the outer tag but also the inner tag. br_vlan_untag() is called only from br_handle_vlan(), and in this case, it is enough to set skb->vlan_tci to 0 here, because vlan_tci has already been set before calling br_handle_vlan(). Signed-off-by: Toshiaki Makita Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/bridge/br_vlan.c | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) (limited to 'net') diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 44f31af0b965..c77eed56b045 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -119,22 +119,6 @@ static void __vlan_flush(struct net_port_vlans *v) kfree_rcu(v, rcu); } -/* Strip the tag from the packet. Will return skb with tci set 0. */ -static struct sk_buff *br_vlan_untag(struct sk_buff *skb) -{ - if (skb->protocol != htons(ETH_P_8021Q)) { - skb->vlan_tci = 0; - return skb; - } - - skb->vlan_tci = 0; - skb = vlan_untag(skb); - if (skb) - skb->vlan_tci = 0; - - return skb; -} - struct sk_buff *br_handle_vlan(struct net_bridge *br, const struct net_port_vlans *pv, struct sk_buff *skb) @@ -150,7 +134,7 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br, */ br_vlan_get_tag(skb, &vid); if (test_bit(vid, pv->untagged_bitmap)) - skb = br_vlan_untag(skb); + skb->vlan_tci = 0; out: return skb; -- cgit From 4f647e0a3c37b8d5086214128614a136064110c3 Mon Sep 17 00:00:00 2001 From: Flavio Leitner Date: Thu, 27 Mar 2014 11:05:34 -0300 Subject: openvswitch: fix a possible deadlock and lockdep warning There are two problematic situations. A deadlock can happen when is_percpu is false because it can get interrupted while holding the spinlock. Then it executes ovs_flow_stats_update() in softirq context which tries to get the same lock. The second sitation is that when is_percpu is true, the code correctly disables BH but only for the local CPU, so the following can happen when locking the remote CPU without disabling BH: CPU#0 CPU#1 ovs_flow_stats_get() stats_read() +->spin_lock remote CPU#1 ovs_flow_stats_get() | stats_read() | ... +--> spin_lock remote CPU#0 | | | ovs_flow_stats_update() | ... | spin_lock local CPU#0 <--+ ovs_flow_stats_update() +---------------------------------- spin_lock local CPU#1 This patch disables BH for both cases fixing the deadlocks. Acked-by: Jesse Gross ================================= [ INFO: inconsistent lock state ] 3.14.0-rc8-00007-g632b06a #1 Tainted: G I --------------------------------- inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. swapper/0/0 [HC0[0]:SC1[5]:HE1:SE0] takes: (&(&cpu_stats->lock)->rlock){+.?...}, at: [] ovs_flow_stats_update+0x51/0xd0 [openvswitch] {SOFTIRQ-ON-W} state was registered at: [] __lock_acquire+0x68f/0x1c40 [] lock_acquire+0xa2/0x1d0 [] _raw_spin_lock+0x3e/0x80 [] ovs_flow_stats_get+0xc4/0x1e0 [openvswitch] [] ovs_flow_cmd_fill_info+0x185/0x360 [openvswitch] [] ovs_flow_cmd_build_info.constprop.27+0x55/0x90 [openvswitch] [] ovs_flow_cmd_new_or_set+0x4dd/0x570 [openvswitch] [] genl_family_rcv_msg+0x1cd/0x3f0 [] genl_rcv_msg+0x8e/0xd0 [] netlink_rcv_skb+0xa9/0xc0 [] genl_rcv+0x28/0x40 [] netlink_unicast+0x100/0x1e0 [] netlink_sendmsg+0x347/0x770 [] sock_sendmsg+0x9c/0xe0 [] ___sys_sendmsg+0x3a9/0x3c0 [] __sys_sendmsg+0x51/0x90 [] SyS_sendmsg+0x12/0x20 [] system_call_fastpath+0x16/0x1b irq event stamp: 1740726 hardirqs last enabled at (1740726): [] ip6_finish_output2+0x4f0/0x840 hardirqs last disabled at (1740725): [] ip6_finish_output2+0x4ab/0x840 softirqs last enabled at (1740674): [] _local_bh_enable+0x22/0x50 softirqs last disabled at (1740675): [] irq_exit+0xc5/0xd0 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(&cpu_stats->lock)->rlock); lock(&(&cpu_stats->lock)->rlock); *** DEADLOCK *** 5 locks held by swapper/0/0: #0: (((&ifa->dad_timer))){+.-...}, at: [] call_timer_fn+0x5/0x320 #1: (rcu_read_lock){.+.+..}, at: [] mld_sendpack+0x5/0x4a0 #2: (rcu_read_lock_bh){.+....}, at: [] ip6_finish_output2+0x59/0x840 #3: (rcu_read_lock_bh){.+....}, at: [] __dev_queue_xmit+0x5/0x9b0 #4: (rcu_read_lock){.+.+..}, at: [] internal_dev_xmit+0x5/0x110 [openvswitch] stack backtrace: CPU: 0 PID: 0 Comm: swapper/0 Tainted: G I 3.14.0-rc8-00007-g632b06a #1 Hardware name: /DX58SO, BIOS SOX5810J.86A.5599.2012.0529.2218 05/29/2012 0000000000000000 0fcf20709903df0c ffff88042d603808 ffffffff817cfe3c ffffffff81c134c0 ffff88042d603858 ffffffff817cb6da 0000000000000005 ffffffff00000001 ffff880400000000 0000000000000006 ffffffff81c134c0 Call Trace: [] dump_stack+0x4d/0x66 [] print_usage_bug+0x1f4/0x205 [] ? check_usage_backwards+0x180/0x180 [] mark_lock+0x223/0x2b0 [] __lock_acquire+0x623/0x1c40 [] ? __lock_is_held+0x57/0x80 [] ? masked_flow_lookup+0x236/0x250 [openvswitch] [] lock_acquire+0xa2/0x1d0 [] ? ovs_flow_stats_update+0x51/0xd0 [openvswitch] [] _raw_spin_lock+0x3e/0x80 [] ? ovs_flow_stats_update+0x51/0xd0 [openvswitch] [] ovs_flow_stats_update+0x51/0xd0 [openvswitch] [] ovs_dp_process_received_packet+0x84/0x120 [openvswitch] [] ? __lock_acquire+0x347/0x1c40 [] ovs_vport_receive+0x2a/0x30 [openvswitch] [] internal_dev_xmit+0x68/0x110 [openvswitch] [] ? internal_dev_xmit+0x5/0x110 [openvswitch] [] dev_hard_start_xmit+0x2e6/0x8b0 [] __dev_queue_xmit+0x417/0x9b0 [] ? __dev_queue_xmit+0x5/0x9b0 [] ? ip6_finish_output2+0x4f0/0x840 [] dev_queue_xmit+0x10/0x20 [] ip6_finish_output2+0x551/0x840 [] ? ip6_finish_output+0x9a/0x220 [] ip6_finish_output+0x9a/0x220 [] ip6_output+0x4f/0x1f0 [] mld_sendpack+0x1d9/0x4a0 [] mld_send_initial_cr.part.32+0x88/0xa0 [] ? addrconf_dad_completed+0x220/0x220 [] ipv6_mc_dad_complete+0x31/0x50 [] addrconf_dad_completed+0x147/0x220 [] ? addrconf_dad_completed+0x220/0x220 [] addrconf_dad_timer+0x19f/0x1c0 [] call_timer_fn+0x99/0x320 [] ? call_timer_fn+0x5/0x320 [] ? addrconf_dad_completed+0x220/0x220 [] run_timer_softirq+0x254/0x3b0 [] __do_softirq+0x12d/0x480 Signed-off-by: Flavio Leitner Signed-off-by: David S. Miller --- net/openvswitch/flow.c | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index dda451f4429c..2998989e76db 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -103,30 +103,24 @@ static void stats_read(struct flow_stats *stats, void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, unsigned long *used, __be16 *tcp_flags) { - int cpu, cur_cpu; + int cpu; *used = 0; *tcp_flags = 0; memset(ovs_stats, 0, sizeof(*ovs_stats)); + local_bh_disable(); if (!flow->stats.is_percpu) { stats_read(flow->stats.stat, ovs_stats, used, tcp_flags); } else { - cur_cpu = get_cpu(); for_each_possible_cpu(cpu) { struct flow_stats *stats; - if (cpu == cur_cpu) - local_bh_disable(); - stats = per_cpu_ptr(flow->stats.cpu_stats, cpu); stats_read(stats, ovs_stats, used, tcp_flags); - - if (cpu == cur_cpu) - local_bh_enable(); } - put_cpu(); } + local_bh_enable(); } static void stats_reset(struct flow_stats *stats) @@ -141,25 +135,17 @@ static void stats_reset(struct flow_stats *stats) void ovs_flow_stats_clear(struct sw_flow *flow) { - int cpu, cur_cpu; + int cpu; + local_bh_disable(); if (!flow->stats.is_percpu) { stats_reset(flow->stats.stat); } else { - cur_cpu = get_cpu(); - for_each_possible_cpu(cpu) { - - if (cpu == cur_cpu) - local_bh_disable(); - stats_reset(per_cpu_ptr(flow->stats.cpu_stats, cpu)); - - if (cpu == cur_cpu) - local_bh_enable(); } - put_cpu(); } + local_bh_enable(); } static int check_header(struct sk_buff *skb, int len) -- cgit From e2a1d3e47bb904082b758dec9d07edf241c45d05 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 27 Mar 2014 07:19:19 -0700 Subject: tcp: fix get_timewait4_sock() delay computation on 64bit It seems I missed one change in get_timewait4_sock() to compute the remaining time before deletion of IPV4 timewait socket. This could result in wrong output in /proc/net/tcp for tm->when field. Fixes: 96f817fedec4 ("tcp: shrink tcp6_timewait_sock by one cache line") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3cf976510497..1e4eac779f51 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2628,7 +2628,7 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw, { __be32 dest, src; __u16 destp, srcp; - long delta = tw->tw_ttd - jiffies; + s32 delta = tw->tw_ttd - inet_tw_time_stamp(); dest = tw->tw_daddr; src = tw->tw_rcv_saddr; -- cgit From c15b1ccadb323ea50023e8f1cca2954129a62b51 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 27 Mar 2014 18:28:07 +0100 Subject: ipv6: move DAD and addrconf_verify processing to workqueue addrconf_join_solict and addrconf_join_anycast may cause actions which need rtnl locked, especially on first address creation. A new DAD state is introduced which defers processing of the initial DAD processing into a workqueue. To get rtnl lock we need to push the code paths which depend on those calls up to workqueues, specifically addrconf_verify and the DAD processing. (v2) addrconf_dad_failure needs to be queued up to the workqueue, too. This patch introduces a new DAD state and stop the DAD processing in the workqueue (this is because of the possible ipv6_del_addr processing which removes the solicited multicast address from the device). addrconf_verify_lock is removed, too. After the transition it is not needed any more. As we are not processing in bottom half anymore we need to be a bit more careful about disabling bottom half out when we lock spin_locks which are also used in bh. Relevant backtrace: [ 541.030090] RTNL: assertion failed at net/core/dev.c (4496) [ 541.031143] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G O 3.10.33-1-amd64-vyatta #1 [ 541.031145] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 [ 541.031146] ffffffff8148a9f0 000000000000002f ffffffff813c98c1 ffff88007c4451f8 [ 541.031148] 0000000000000000 0000000000000000 ffffffff813d3540 ffff88007fc03d18 [ 541.031150] 0000880000000006 ffff88007c445000 ffffffffa0194160 0000000000000000 [ 541.031152] Call Trace: [ 541.031153] [] ? dump_stack+0xd/0x17 [ 541.031180] [] ? __dev_set_promiscuity+0x101/0x180 [ 541.031183] [] ? __hw_addr_create_ex+0x60/0xc0 [ 541.031185] [] ? __dev_set_rx_mode+0xaa/0xc0 [ 541.031189] [] ? __dev_mc_add+0x61/0x90 [ 541.031198] [] ? igmp6_group_added+0xfc/0x1a0 [ipv6] [ 541.031208] [] ? kmem_cache_alloc+0xcb/0xd0 [ 541.031212] [] ? ipv6_dev_mc_inc+0x267/0x300 [ipv6] [ 541.031216] [] ? addrconf_join_solict+0x2e/0x40 [ipv6] [ 541.031219] [] ? ipv6_dev_ac_inc+0x159/0x1f0 [ipv6] [ 541.031223] [] ? addrconf_join_anycast+0x92/0xa0 [ipv6] [ 541.031226] [] ? __ipv6_ifa_notify+0x11e/0x1e0 [ipv6] [ 541.031229] [] ? ipv6_ifa_notify+0x33/0x50 [ipv6] [ 541.031233] [] ? addrconf_dad_completed+0x28/0x100 [ipv6] [ 541.031241] [] ? task_cputime+0x2d/0x50 [ 541.031244] [] ? addrconf_dad_timer+0x136/0x150 [ipv6] [ 541.031247] [] ? addrconf_dad_completed+0x100/0x100 [ipv6] [ 541.031255] [] ? call_timer_fn.isra.22+0x2a/0x90 [ 541.031258] [] ? addrconf_dad_completed+0x100/0x100 [ipv6] Hunks and backtrace stolen from a patch by Stephen Hemminger. Reported-by: Stephen Hemminger Signed-off-by: Stephen Hemminger Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/if_inet6.h | 4 +- net/ipv6/addrconf.c | 193 ++++++++++++++++++++++++++++++++++++------------- 2 files changed, 145 insertions(+), 52 deletions(-) (limited to 'net') diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 9650a3ffd2d2..b4956a5fcc3f 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -31,8 +31,10 @@ #define IF_PREFIX_AUTOCONF 0x02 enum { + INET6_IFADDR_STATE_PREDAD, INET6_IFADDR_STATE_DAD, INET6_IFADDR_STATE_POSTDAD, + INET6_IFADDR_STATE_ERRDAD, INET6_IFADDR_STATE_UP, INET6_IFADDR_STATE_DEAD, }; @@ -58,7 +60,7 @@ struct inet6_ifaddr { unsigned long cstamp; /* created timestamp */ unsigned long tstamp; /* updated timestamp */ - struct timer_list dad_timer; + struct delayed_work dad_work; struct inet6_dev *idev; struct rt6_info *rt; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 344e972426df..6c7fa0853fc7 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -133,10 +133,12 @@ static int ipv6_count_addresses(struct inet6_dev *idev); static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE]; static DEFINE_SPINLOCK(addrconf_hash_lock); -static void addrconf_verify(unsigned long); +static void addrconf_verify(void); +static void addrconf_verify_rtnl(void); +static void addrconf_verify_work(struct work_struct *); -static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0); -static DEFINE_SPINLOCK(addrconf_verify_lock); +static struct workqueue_struct *addrconf_wq; +static DECLARE_DELAYED_WORK(addr_chk_work, addrconf_verify_work); static void addrconf_join_anycast(struct inet6_ifaddr *ifp); static void addrconf_leave_anycast(struct inet6_ifaddr *ifp); @@ -151,7 +153,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, u32 flags, u32 noflags); static void addrconf_dad_start(struct inet6_ifaddr *ifp); -static void addrconf_dad_timer(unsigned long data); +static void addrconf_dad_work(struct work_struct *w); static void addrconf_dad_completed(struct inet6_ifaddr *ifp); static void addrconf_dad_run(struct inet6_dev *idev); static void addrconf_rs_timer(unsigned long data); @@ -247,9 +249,9 @@ static void addrconf_del_rs_timer(struct inet6_dev *idev) __in6_dev_put(idev); } -static void addrconf_del_dad_timer(struct inet6_ifaddr *ifp) +static void addrconf_del_dad_work(struct inet6_ifaddr *ifp) { - if (del_timer(&ifp->dad_timer)) + if (cancel_delayed_work(&ifp->dad_work)) __in6_ifa_put(ifp); } @@ -261,12 +263,12 @@ static void addrconf_mod_rs_timer(struct inet6_dev *idev, mod_timer(&idev->rs_timer, jiffies + when); } -static void addrconf_mod_dad_timer(struct inet6_ifaddr *ifp, - unsigned long when) +static void addrconf_mod_dad_work(struct inet6_ifaddr *ifp, + unsigned long delay) { - if (!timer_pending(&ifp->dad_timer)) + if (!delayed_work_pending(&ifp->dad_work)) in6_ifa_hold(ifp); - mod_timer(&ifp->dad_timer, jiffies + when); + mod_delayed_work(addrconf_wq, &ifp->dad_work, delay); } static int snmp6_alloc_dev(struct inet6_dev *idev) @@ -751,8 +753,9 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) in6_dev_put(ifp->idev); - if (del_timer(&ifp->dad_timer)) - pr_notice("Timer is still running, when freeing ifa=%p\n", ifp); + if (cancel_delayed_work(&ifp->dad_work)) + pr_notice("delayed DAD work was pending while freeing ifa=%p\n", + ifp); if (ifp->state != INET6_IFADDR_STATE_DEAD) { pr_warn("Freeing alive inet6 address %p\n", ifp); @@ -849,8 +852,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, spin_lock_init(&ifa->lock); spin_lock_init(&ifa->state_lock); - setup_timer(&ifa->dad_timer, addrconf_dad_timer, - (unsigned long)ifa); + INIT_DELAYED_WORK(&ifa->dad_work, addrconf_dad_work); INIT_HLIST_NODE(&ifa->addr_lst); ifa->scope = scope; ifa->prefix_len = pfxlen; @@ -990,6 +992,8 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) enum cleanup_prefix_rt_t action = CLEANUP_PREFIX_RT_NOP; unsigned long expires; + ASSERT_RTNL(); + spin_lock_bh(&ifp->state_lock); state = ifp->state; ifp->state = INET6_IFADDR_STATE_DEAD; @@ -1021,7 +1025,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) write_unlock_bh(&ifp->idev->lock); - addrconf_del_dad_timer(ifp); + addrconf_del_dad_work(ifp); ipv6_ifa_notify(RTM_DELADDR, ifp); @@ -1604,7 +1608,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) { if (ifp->flags&IFA_F_PERMANENT) { spin_lock_bh(&ifp->lock); - addrconf_del_dad_timer(ifp); + addrconf_del_dad_work(ifp); ifp->flags |= IFA_F_TENTATIVE; if (dad_failed) ifp->flags |= IFA_F_DADFAILED; @@ -1625,20 +1629,21 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) spin_unlock_bh(&ifp->lock); } ipv6_del_addr(ifp); - } else + } else { ipv6_del_addr(ifp); + } } static int addrconf_dad_end(struct inet6_ifaddr *ifp) { int err = -ENOENT; - spin_lock(&ifp->state_lock); + spin_lock_bh(&ifp->state_lock); if (ifp->state == INET6_IFADDR_STATE_DAD) { ifp->state = INET6_IFADDR_STATE_POSTDAD; err = 0; } - spin_unlock(&ifp->state_lock); + spin_unlock_bh(&ifp->state_lock); return err; } @@ -1671,7 +1676,12 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp) } } - addrconf_dad_stop(ifp, 1); + spin_lock_bh(&ifp->state_lock); + /* transition from _POSTDAD to _ERRDAD */ + ifp->state = INET6_IFADDR_STATE_ERRDAD; + spin_unlock_bh(&ifp->state_lock); + + addrconf_mod_dad_work(ifp, 0); } /* Join to solicited addr multicast group. */ @@ -1680,6 +1690,8 @@ void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr) { struct in6_addr maddr; + ASSERT_RTNL(); + if (dev->flags&(IFF_LOOPBACK|IFF_NOARP)) return; @@ -1691,6 +1703,8 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr) { struct in6_addr maddr; + ASSERT_RTNL(); + if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP)) return; @@ -1701,6 +1715,9 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr) static void addrconf_join_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; + + ASSERT_RTNL(); + if (ifp->prefix_len >= 127) /* RFC 6164 */ return; ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); @@ -1712,6 +1729,9 @@ static void addrconf_join_anycast(struct inet6_ifaddr *ifp) static void addrconf_leave_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; + + ASSERT_RTNL(); + if (ifp->prefix_len >= 127) /* RFC 6164 */ return; ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); @@ -2271,11 +2291,13 @@ ok: return; } - ifp->flags |= IFA_F_MANAGETEMPADDR; update_lft = 0; create = 1; + spin_lock_bh(&ifp->lock); + ifp->flags |= IFA_F_MANAGETEMPADDR; ifp->cstamp = jiffies; ifp->tokenized = tokenized; + spin_unlock_bh(&ifp->lock); addrconf_dad_start(ifp); } @@ -2326,7 +2348,7 @@ ok: create, now); in6_ifa_put(ifp); - addrconf_verify(0); + addrconf_verify(); } } inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo); @@ -2475,7 +2497,7 @@ static int inet6_addr_add(struct net *net, int ifindex, manage_tempaddrs(idev, ifp, valid_lft, prefered_lft, true, jiffies); in6_ifa_put(ifp); - addrconf_verify(0); + addrconf_verify_rtnl(); return 0; } @@ -3011,7 +3033,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) hlist_for_each_entry_rcu(ifa, h, addr_lst) { if (ifa->idev == idev) { hlist_del_init_rcu(&ifa->addr_lst); - addrconf_del_dad_timer(ifa); + addrconf_del_dad_work(ifa); goto restart; } } @@ -3049,7 +3071,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) while (!list_empty(&idev->addr_list)) { ifa = list_first_entry(&idev->addr_list, struct inet6_ifaddr, if_list); - addrconf_del_dad_timer(ifa); + addrconf_del_dad_work(ifa); list_del(&ifa->if_list); @@ -3148,10 +3170,10 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp) rand_num = prandom_u32() % (idev->cnf.rtr_solicit_delay ? : 1); ifp->dad_probes = idev->cnf.dad_transmits; - addrconf_mod_dad_timer(ifp, rand_num); + addrconf_mod_dad_work(ifp, rand_num); } -static void addrconf_dad_start(struct inet6_ifaddr *ifp) +static void addrconf_dad_begin(struct inet6_ifaddr *ifp) { struct inet6_dev *idev = ifp->idev; struct net_device *dev = idev->dev; @@ -3203,25 +3225,68 @@ out: read_unlock_bh(&idev->lock); } -static void addrconf_dad_timer(unsigned long data) +static void addrconf_dad_start(struct inet6_ifaddr *ifp) { - struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data; + bool begin_dad = false; + + spin_lock_bh(&ifp->state_lock); + if (ifp->state != INET6_IFADDR_STATE_DEAD) { + ifp->state = INET6_IFADDR_STATE_PREDAD; + begin_dad = true; + } + spin_unlock_bh(&ifp->state_lock); + + if (begin_dad) + addrconf_mod_dad_work(ifp, 0); +} + +static void addrconf_dad_work(struct work_struct *w) +{ + struct inet6_ifaddr *ifp = container_of(to_delayed_work(w), + struct inet6_ifaddr, + dad_work); struct inet6_dev *idev = ifp->idev; struct in6_addr mcaddr; + enum { + DAD_PROCESS, + DAD_BEGIN, + DAD_ABORT, + } action = DAD_PROCESS; + + rtnl_lock(); + + spin_lock_bh(&ifp->state_lock); + if (ifp->state == INET6_IFADDR_STATE_PREDAD) { + action = DAD_BEGIN; + ifp->state = INET6_IFADDR_STATE_DAD; + } else if (ifp->state == INET6_IFADDR_STATE_ERRDAD) { + action = DAD_ABORT; + ifp->state = INET6_IFADDR_STATE_POSTDAD; + } + spin_unlock_bh(&ifp->state_lock); + + if (action == DAD_BEGIN) { + addrconf_dad_begin(ifp); + goto out; + } else if (action == DAD_ABORT) { + addrconf_dad_stop(ifp, 1); + goto out; + } + if (!ifp->dad_probes && addrconf_dad_end(ifp)) goto out; - write_lock(&idev->lock); + write_lock_bh(&idev->lock); if (idev->dead || !(idev->if_flags & IF_READY)) { - write_unlock(&idev->lock); + write_unlock_bh(&idev->lock); goto out; } spin_lock(&ifp->lock); if (ifp->state == INET6_IFADDR_STATE_DEAD) { spin_unlock(&ifp->lock); - write_unlock(&idev->lock); + write_unlock_bh(&idev->lock); goto out; } @@ -3232,7 +3297,7 @@ static void addrconf_dad_timer(unsigned long data) ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED); spin_unlock(&ifp->lock); - write_unlock(&idev->lock); + write_unlock_bh(&idev->lock); addrconf_dad_completed(ifp); @@ -3240,16 +3305,17 @@ static void addrconf_dad_timer(unsigned long data) } ifp->dad_probes--; - addrconf_mod_dad_timer(ifp, - NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME)); + addrconf_mod_dad_work(ifp, + NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME)); spin_unlock(&ifp->lock); - write_unlock(&idev->lock); + write_unlock_bh(&idev->lock); /* send a neighbour solicitation for our addr */ addrconf_addr_solict_mult(&ifp->addr, &mcaddr); ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any); out: in6_ifa_put(ifp); + rtnl_unlock(); } /* ifp->idev must be at least read locked */ @@ -3276,7 +3342,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) struct in6_addr lladdr; bool send_rs, send_mld; - addrconf_del_dad_timer(ifp); + addrconf_del_dad_work(ifp); /* * Configure the address for reception. Now it is valid. @@ -3517,23 +3583,23 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr) * Periodic address status verification */ -static void addrconf_verify(unsigned long foo) +static void addrconf_verify_rtnl(void) { unsigned long now, next, next_sec, next_sched; struct inet6_ifaddr *ifp; int i; + ASSERT_RTNL(); + rcu_read_lock_bh(); - spin_lock(&addrconf_verify_lock); now = jiffies; next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY); - del_timer(&addr_chk_timer); + cancel_delayed_work(&addr_chk_work); for (i = 0; i < IN6_ADDR_HSIZE; i++) { restart: - hlist_for_each_entry_rcu_bh(ifp, - &inet6_addr_lst[i], addr_lst) { + hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[i], addr_lst) { unsigned long age; /* When setting preferred_lft to a value not zero or @@ -3628,13 +3694,22 @@ restart: ADBG(KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n", now, next, next_sec, next_sched); - - addr_chk_timer.expires = next_sched; - add_timer(&addr_chk_timer); - spin_unlock(&addrconf_verify_lock); + mod_delayed_work(addrconf_wq, &addr_chk_work, next_sched - now); rcu_read_unlock_bh(); } +static void addrconf_verify_work(struct work_struct *w) +{ + rtnl_lock(); + addrconf_verify_rtnl(); + rtnl_unlock(); +} + +static void addrconf_verify(void) +{ + mod_delayed_work(addrconf_wq, &addr_chk_work, 0); +} + static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local, struct in6_addr **peer_pfx) { @@ -3691,6 +3766,8 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags, bool was_managetempaddr; bool had_prefixroute; + ASSERT_RTNL(); + if (!valid_lft || (prefered_lft > valid_lft)) return -EINVAL; @@ -3756,7 +3833,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags, !was_managetempaddr, jiffies); } - addrconf_verify(0); + addrconf_verify_rtnl(); return 0; } @@ -4386,6 +4463,8 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) bool update_rs = false; struct in6_addr ll_addr; + ASSERT_RTNL(); + if (token == NULL) return -EINVAL; if (ipv6_addr_any(token)) @@ -4434,7 +4513,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) } write_unlock_bh(&idev->lock); - addrconf_verify(0); + addrconf_verify_rtnl(); return 0; } @@ -4636,6 +4715,9 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) { struct net *net = dev_net(ifp->idev->dev); + if (event) + ASSERT_RTNL(); + inet6_ifa_notify(event ? : RTM_NEWADDR, ifp); switch (event) { @@ -5244,6 +5326,12 @@ int __init addrconf_init(void) if (err < 0) goto out_addrlabel; + addrconf_wq = create_workqueue("ipv6_addrconf"); + if (!addrconf_wq) { + err = -ENOMEM; + goto out_nowq; + } + /* The addrconf netdev notifier requires that loopback_dev * has it's ipv6 private information allocated and setup * before it can bring up and give link-local addresses @@ -5274,7 +5362,7 @@ int __init addrconf_init(void) register_netdevice_notifier(&ipv6_dev_notf); - addrconf_verify(0); + addrconf_verify(); rtnl_af_register(&inet6_ops); @@ -5302,6 +5390,8 @@ errout: rtnl_af_unregister(&inet6_ops); unregister_netdevice_notifier(&ipv6_dev_notf); errlo: + destroy_workqueue(addrconf_wq); +out_nowq: unregister_pernet_subsys(&addrconf_ops); out_addrlabel: ipv6_addr_label_cleanup(); @@ -5337,7 +5427,8 @@ void addrconf_cleanup(void) for (i = 0; i < IN6_ADDR_HSIZE; i++) WARN_ON(!hlist_empty(&inet6_addr_lst[i])); spin_unlock_bh(&addrconf_hash_lock); - - del_timer(&addr_chk_timer); + cancel_delayed_work(&addr_chk_work); rtnl_unlock(); + + destroy_workqueue(addrconf_wq); } -- cgit From 53d6471cef17262d3ad1c7ce8982a234244f68ec Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 27 Mar 2014 17:26:18 -0400 Subject: net: Account for all vlan headers in skb_mac_gso_segment skb_network_protocol() already accounts for multiple vlan headers that may be present in the skb. However, skb_mac_gso_segment() doesn't know anything about it and assumes that skb->mac_len is set correctly to skip all mac headers. That may not always be the case. If we are simply forwarding the packet (via bridge or macvtap), all vlan headers may not be accounted for. A simple solution is to allow skb_network_protocol to return the vlan depth it has calculated. This way skb_mac_gso_segment will correctly skip all mac headers. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- net/core/dev.c | 13 +++++++++---- net/core/skbuff.c | 3 ++- 3 files changed, 12 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e8eeebd49a98..daafd9561cbc 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3014,7 +3014,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features) { return __skb_gso_segment(skb, features, true); } -__be16 skb_network_protocol(struct sk_buff *skb); +__be16 skb_network_protocol(struct sk_buff *skb, int *depth); static inline bool can_checksum_protocol(netdev_features_t features, __be16 protocol) diff --git a/net/core/dev.c b/net/core/dev.c index b1b0c8d4d7df..45fa2f11f84d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2286,7 +2286,7 @@ out: } EXPORT_SYMBOL(skb_checksum_help); -__be16 skb_network_protocol(struct sk_buff *skb) +__be16 skb_network_protocol(struct sk_buff *skb, int *depth) { __be16 type = skb->protocol; int vlan_depth = ETH_HLEN; @@ -2313,6 +2313,8 @@ __be16 skb_network_protocol(struct sk_buff *skb) vlan_depth += VLAN_HLEN; } + *depth = vlan_depth; + return type; } @@ -2326,12 +2328,13 @@ struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, { struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); struct packet_offload *ptype; - __be16 type = skb_network_protocol(skb); + int vlan_depth = skb->mac_len; + __be16 type = skb_network_protocol(skb, &vlan_depth); if (unlikely(!type)) return ERR_PTR(-EINVAL); - __skb_pull(skb, skb->mac_len); + __skb_pull(skb, vlan_depth); rcu_read_lock(); list_for_each_entry_rcu(ptype, &offload_base, list) { @@ -2498,8 +2501,10 @@ static netdev_features_t harmonize_features(struct sk_buff *skb, const struct net_device *dev, netdev_features_t features) { + int tmp; + if (skb->ip_summed != CHECKSUM_NONE && - !can_checksum_protocol(features, skb_network_protocol(skb))) { + !can_checksum_protocol(features, skb_network_protocol(skb, &tmp))) { features &= ~NETIF_F_ALL_CSUM; } else if (illegal_highdma(dev, skb)) { features &= ~NETIF_F_SG; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 97e5a2c3d947..90b96a11b974 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2879,8 +2879,9 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, int err = -ENOMEM; int i = 0; int pos; + int dummy; - proto = skb_network_protocol(head_skb); + proto = skb_network_protocol(head_skb, &dummy); if (unlikely(!proto)) return ERR_PTR(-EINVAL); -- cgit From fc92f745f8d0d3736ce5afb00a905d7cc61f9c46 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 27 Mar 2014 21:51:18 -0400 Subject: bridge: Fix crash with vlan filtering and tcpdump When the vlan filtering is enabled on the bridge, but the filter is not configured on the bridge device itself, running tcpdump on the bridge device will result in a an Oops with NULL pointer dereference. The reason is that br_pass_frame_up() will bypass the vlan check because promisc flag is set. It will then try to get the table pointer and process the packet based on the table. Since the table pointer is NULL, we oops. Catch this special condition in br_handle_vlan(). Reported-by: Toshiaki Makita CC: Toshiaki Makita Signed-off-by: Vlad Yasevich Acked-by: Toshiaki Makita Signed-off-by: David S. Miller --- net/bridge/br_input.c | 11 ++++++----- net/bridge/br_vlan.c | 14 ++++++++++++++ 2 files changed, 20 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 28d544627422..d0cca3c65f01 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -29,6 +29,7 @@ static int br_pass_frame_up(struct sk_buff *skb) struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev; struct net_bridge *br = netdev_priv(brdev); struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats); + struct net_port_vlans *pv; u64_stats_update_begin(&brstats->syncp); brstats->rx_packets++; @@ -39,18 +40,18 @@ static int br_pass_frame_up(struct sk_buff *skb) * packet is allowed except in promisc modue when someone * may be running packet capture. */ + pv = br_get_vlan_info(br); if (!(brdev->flags & IFF_PROMISC) && - !br_allowed_egress(br, br_get_vlan_info(br), skb)) { + !br_allowed_egress(br, pv, skb)) { kfree_skb(skb); return NET_RX_DROP; } - skb = br_handle_vlan(br, br_get_vlan_info(br), skb); - if (!skb) - return NET_RX_DROP; - indev = skb->dev; skb->dev = brdev; + skb = br_handle_vlan(br, pv, skb); + if (!skb) + return NET_RX_DROP; return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL, netif_receive_skb); diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index c77eed56b045..f23c74b3a953 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -128,6 +128,20 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br, if (!br->vlan_enabled) goto out; + /* Vlan filter table must be configured at this point. The + * only exception is the bridge is set in promisc mode and the + * packet is destined for the bridge device. In this case + * pass the packet as is. + */ + if (!pv) { + if ((br->dev->flags & IFF_PROMISC) && skb->dev == br->dev) { + goto out; + } else { + kfree_skb(skb); + return NULL; + } + } + /* At this point, we know that the frame was filtered and contains * a valid vlan id. If the vlan id is set in the untagged bitmap, * send untagged; otherwise, send tagged. -- cgit From 2adb956b084d6d49f519541a4b5f9947e96f8ef7 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 27 Mar 2014 22:14:49 -0400 Subject: vlan: Warn the user if lowerdev has bad vlan features. Some drivers incorrectly assign vlan acceleration features to vlan_features thus causing issues for Q-in-Q vlan configurations. Warn the user of such cases. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 7 +++++++ net/8021q/vlan_dev.c | 3 +++ 2 files changed, 10 insertions(+) (limited to 'net') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 1005ebf17575..5a09a48f2658 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -163,4 +163,11 @@ enum { /* changeable features with no special hardware requirements */ #define NETIF_F_SOFT_FEATURES (NETIF_F_GSO | NETIF_F_GRO) +#define NETIF_F_VLAN_FEATURES (NETIF_F_HW_VLAN_CTAG_FILTER | \ + NETIF_F_HW_VLAN_CTAG_RX | \ + NETIF_F_HW_VLAN_CTAG_TX | \ + NETIF_F_HW_VLAN_STAG_FILTER | \ + NETIF_F_HW_VLAN_STAG_RX | \ + NETIF_F_HW_VLAN_STAG_TX) + #endif /* _LINUX_NETDEV_FEATURES_H */ diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index a9591ff2b678..27bfe2f8e2de 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -578,6 +578,9 @@ static int vlan_dev_init(struct net_device *dev) dev->features |= real_dev->vlan_features | NETIF_F_LLTX; dev->gso_max_size = real_dev->gso_max_size; + if (dev->features & NETIF_F_VLAN_FEATURES) + netdev_warn(real_dev, "VLAN features are set incorrectly. Q-in-Q configurations may not work correctly.\n"); + /* ipv6 shared card related stuff */ dev->dev_id = real_dev->dev_id; -- cgit