aboutsummaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c57
-rw-r--r--net/core/devlink.c8
-rw-r--r--net/core/dst.c23
-rw-r--r--net/core/filter.c1
-rw-r--r--net/core/neighbour.c14
-rw-r--r--net/core/net_namespace.c19
-rw-r--r--net/core/rtnetlink.c88
-rw-r--r--net/core/skbuff.c5
-rw-r--r--net/core/sock.c23
-rw-r--r--net/core/sysctl_net_core.c2
10 files changed, 152 insertions, 88 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 96cf83da0d66..fca407b4a6ea 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6852,6 +6852,32 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
}
EXPORT_SYMBOL(dev_change_proto_down);
+bool __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op)
+{
+ struct netdev_xdp xdp;
+
+ memset(&xdp, 0, sizeof(xdp));
+ xdp.command = XDP_QUERY_PROG;
+
+ /* Query must always succeed. */
+ WARN_ON(xdp_op(dev, &xdp) < 0);
+ return xdp.prog_attached;
+}
+
+static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op,
+ struct netlink_ext_ack *extack,
+ struct bpf_prog *prog)
+{
+ struct netdev_xdp xdp;
+
+ memset(&xdp, 0, sizeof(xdp));
+ xdp.command = XDP_SETUP_PROG;
+ xdp.extack = extack;
+ xdp.prog = prog;
+
+ return xdp_op(dev, &xdp);
+}
+
/**
* dev_change_xdp_fd - set or clear a bpf program for a device rx path
* @dev: device
@@ -6864,41 +6890,34 @@ EXPORT_SYMBOL(dev_change_proto_down);
int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
int fd, u32 flags)
{
- int (*xdp_op)(struct net_device *dev, struct netdev_xdp *xdp);
const struct net_device_ops *ops = dev->netdev_ops;
struct bpf_prog *prog = NULL;
- struct netdev_xdp xdp;
+ xdp_op_t xdp_op, xdp_chk;
int err;
ASSERT_RTNL();
- xdp_op = ops->ndo_xdp;
+ xdp_op = xdp_chk = ops->ndo_xdp;
+ if (!xdp_op && (flags & XDP_FLAGS_DRV_MODE))
+ return -EOPNOTSUPP;
if (!xdp_op || (flags & XDP_FLAGS_SKB_MODE))
xdp_op = generic_xdp_install;
+ if (xdp_op == xdp_chk)
+ xdp_chk = generic_xdp_install;
if (fd >= 0) {
- if (flags & XDP_FLAGS_UPDATE_IF_NOEXIST) {
- memset(&xdp, 0, sizeof(xdp));
- xdp.command = XDP_QUERY_PROG;
-
- err = xdp_op(dev, &xdp);
- if (err < 0)
- return err;
- if (xdp.prog_attached)
- return -EBUSY;
- }
+ if (xdp_chk && __dev_xdp_attached(dev, xdp_chk))
+ return -EEXIST;
+ if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
+ __dev_xdp_attached(dev, xdp_op))
+ return -EBUSY;
prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
if (IS_ERR(prog))
return PTR_ERR(prog);
}
- memset(&xdp, 0, sizeof(xdp));
- xdp.command = XDP_SETUP_PROG;
- xdp.extack = extack;
- xdp.prog = prog;
-
- err = xdp_op(dev, &xdp);
+ err = dev_xdp_install(dev, xdp_op, extack, prog);
if (err < 0 && prog)
bpf_prog_put(prog);
diff --git a/net/core/devlink.c b/net/core/devlink.c
index b0b87a292e7c..a0adfc31a3fe 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -1680,8 +1680,10 @@ start_again:
hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
&devlink_nl_family, NLM_F_MULTI, cmd);
- if (!hdr)
+ if (!hdr) {
+ nlmsg_free(skb);
return -EMSGSIZE;
+ }
if (devlink_nl_put_handle(skb, devlink))
goto nla_put_failure;
@@ -2098,8 +2100,10 @@ start_again:
hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
&devlink_nl_family, NLM_F_MULTI, cmd);
- if (!hdr)
+ if (!hdr) {
+ nlmsg_free(skb);
return -EMSGSIZE;
+ }
if (devlink_nl_put_handle(skb, devlink))
goto nla_put_failure;
diff --git a/net/core/dst.c b/net/core/dst.c
index 960e503b5a52..6192f11beec9 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -151,13 +151,13 @@ int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(dst_discard_out);
-const u32 dst_default_metrics[RTAX_MAX + 1] = {
+const struct dst_metrics dst_default_metrics = {
/* This initializer is needed to force linker to place this variable
* into const section. Otherwise it might end into bss section.
* We really want to avoid false sharing on this variable, and catch
* any writes on it.
*/
- [RTAX_MAX] = 0xdeadbeef,
+ .refcnt = ATOMIC_INIT(1),
};
void dst_init(struct dst_entry *dst, struct dst_ops *ops,
@@ -169,7 +169,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
if (dev)
dev_hold(dev);
dst->ops = ops;
- dst_init_metrics(dst, dst_default_metrics, true);
+ dst_init_metrics(dst, dst_default_metrics.metrics, true);
dst->expires = 0UL;
dst->path = dst;
dst->from = NULL;
@@ -314,25 +314,30 @@ EXPORT_SYMBOL(dst_release);
u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old)
{
- u32 *p = kmalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC);
+ struct dst_metrics *p = kmalloc(sizeof(*p), GFP_ATOMIC);
if (p) {
- u32 *old_p = __DST_METRICS_PTR(old);
+ struct dst_metrics *old_p = (struct dst_metrics *)__DST_METRICS_PTR(old);
unsigned long prev, new;
- memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
+ atomic_set(&p->refcnt, 1);
+ memcpy(p->metrics, old_p->metrics, sizeof(p->metrics));
new = (unsigned long) p;
prev = cmpxchg(&dst->_metrics, old, new);
if (prev != old) {
kfree(p);
- p = __DST_METRICS_PTR(prev);
+ p = (struct dst_metrics *)__DST_METRICS_PTR(prev);
if (prev & DST_METRICS_READ_ONLY)
p = NULL;
+ } else if (prev & DST_METRICS_REFCOUNTED) {
+ if (atomic_dec_and_test(&old_p->refcnt))
+ kfree(old_p);
}
}
- return p;
+ BUILD_BUG_ON(offsetof(struct dst_metrics, metrics) != 0);
+ return (u32 *)p;
}
EXPORT_SYMBOL(dst_cow_metrics_generic);
@@ -341,7 +346,7 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
{
unsigned long prev, new;
- new = ((unsigned long) dst_default_metrics) | DST_METRICS_READ_ONLY;
+ new = ((unsigned long) &dst_default_metrics) | DST_METRICS_READ_ONLY;
prev = cmpxchg(&dst->_metrics, old, new);
if (prev == old)
kfree(__DST_METRICS_PTR(old));
diff --git a/net/core/filter.c b/net/core/filter.c
index a253a6197e6b..a6bb95fa87b2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2281,6 +2281,7 @@ bool bpf_helper_changes_pkt_data(void *func)
func == bpf_skb_change_head ||
func == bpf_skb_change_tail ||
func == bpf_skb_pull_data ||
+ func == bpf_clone_redirect ||
func == bpf_l3_csum_replace ||
func == bpf_l4_csum_replace ||
func == bpf_xdp_adjust_head)
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 58b0bcc125b5..d274f81fcc2c 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1132,10 +1132,6 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
lladdr = neigh->ha;
}
- if (new & NUD_CONNECTED)
- neigh->confirmed = jiffies;
- neigh->updated = jiffies;
-
/* If entry was valid and address is not changed,
do not change entry state, if new one is STALE.
*/
@@ -1157,6 +1153,16 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
}
}
+ /* Update timestamps only once we know we will make a change to the
+ * neighbour entry. Otherwise we risk to move the locktime window with
+ * noop updates and ignore relevant ARP updates.
+ */
+ if (new != old || lladdr != neigh->ha) {
+ if (new & NUD_CONNECTED)
+ neigh->confirmed = jiffies;
+ neigh->updated = jiffies;
+ }
+
if (new != old) {
neigh_del_timer(neigh);
if (new & NUD_PROBE)
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 1934efd4a9d4..26bbfababff2 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -315,6 +315,25 @@ out_undo:
goto out;
}
+static int __net_init net_defaults_init_net(struct net *net)
+{
+ net->core.sysctl_somaxconn = SOMAXCONN;
+ return 0;
+}
+
+static struct pernet_operations net_defaults_ops = {
+ .init = net_defaults_init_net,
+};
+
+static __init int net_defaults_init(void)
+{
+ if (register_pernet_subsys(&net_defaults_ops))
+ panic("Cannot initialize net default settings");
+
+ return 0;
+}
+
+core_initcall(net_defaults_init);
#ifdef CONFIG_NET_NS
static struct ucounts *inc_net_namespaces(struct user_namespace *ns)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index bcb0f610ee42..9e2c0a7cb325 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -899,8 +899,7 @@ static size_t rtnl_port_size(const struct net_device *dev,
static size_t rtnl_xdp_size(void)
{
size_t xdp_size = nla_total_size(0) + /* nest IFLA_XDP */
- nla_total_size(1) + /* XDP_ATTACHED */
- nla_total_size(4); /* XDP_FLAGS */
+ nla_total_size(1); /* XDP_ATTACHED */
return xdp_size;
}
@@ -1247,37 +1246,34 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
return 0;
}
+static u8 rtnl_xdp_attached_mode(struct net_device *dev)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ ASSERT_RTNL();
+
+ if (rcu_access_pointer(dev->xdp_prog))
+ return XDP_ATTACHED_SKB;
+ if (ops->ndo_xdp && __dev_xdp_attached(dev, ops->ndo_xdp))
+ return XDP_ATTACHED_DRV;
+
+ return XDP_ATTACHED_NONE;
+}
+
static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
{
struct nlattr *xdp;
- u32 xdp_flags = 0;
- u8 val = 0;
int err;
xdp = nla_nest_start(skb, IFLA_XDP);
if (!xdp)
return -EMSGSIZE;
- if (rcu_access_pointer(dev->xdp_prog)) {
- xdp_flags = XDP_FLAGS_SKB_MODE;
- val = 1;
- } else if (dev->netdev_ops->ndo_xdp) {
- struct netdev_xdp xdp_op = {};
-
- xdp_op.command = XDP_QUERY_PROG;
- err = dev->netdev_ops->ndo_xdp(dev, &xdp_op);
- if (err)
- goto err_cancel;
- val = xdp_op.prog_attached;
- }
- err = nla_put_u8(skb, IFLA_XDP_ATTACHED, val);
+
+ err = nla_put_u8(skb, IFLA_XDP_ATTACHED,
+ rtnl_xdp_attached_mode(dev));
if (err)
goto err_cancel;
- if (xdp_flags) {
- err = nla_put_u32(skb, IFLA_XDP_FLAGS, xdp_flags);
- if (err)
- goto err_cancel;
- }
nla_nest_end(skb, xdp);
return 0;
@@ -1631,13 +1627,13 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
cb->nlh->nlmsg_seq, 0,
flags,
ext_filter_mask);
- /* If we ran out of room on the first message,
- * we're in trouble
- */
- WARN_ON((err == -EMSGSIZE) && (skb->len == 0));
- if (err < 0)
- goto out;
+ if (err < 0) {
+ if (likely(skb->len))
+ goto out;
+
+ goto out_err;
+ }
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
@@ -1645,10 +1641,12 @@ cont:
}
}
out:
+ err = skb->len;
+out_err:
cb->args[1] = idx;
cb->args[0] = h;
- return skb->len;
+ return err;
}
int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len,
@@ -2199,6 +2197,11 @@ static int do_setlink(const struct sk_buff *skb,
err = -EINVAL;
goto errout;
}
+ if ((xdp_flags & XDP_FLAGS_SKB_MODE) &&
+ (xdp_flags & XDP_FLAGS_DRV_MODE)) {
+ err = -EINVAL;
+ goto errout;
+ }
}
if (xdp[IFLA_XDP_FD]) {
@@ -3228,8 +3231,11 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
int err = 0;
int fidx = 0;
- if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb,
- IFLA_MAX, ifla_policy, NULL) == 0) {
+ err = nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb,
+ IFLA_MAX, ifla_policy, NULL);
+ if (err < 0) {
+ return -EINVAL;
+ } else if (err == 0) {
if (tb[IFLA_MASTER])
br_idx = nla_get_u32(tb[IFLA_MASTER]);
}
@@ -3452,8 +3458,12 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
err = br_dev->netdev_ops->ndo_bridge_getlink(
skb, portid, seq, dev,
filter_mask, NLM_F_MULTI);
- if (err < 0 && err != -EOPNOTSUPP)
- break;
+ if (err < 0 && err != -EOPNOTSUPP) {
+ if (likely(skb->len))
+ break;
+
+ goto out_err;
+ }
}
idx++;
}
@@ -3464,16 +3474,22 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
seq, dev,
filter_mask,
NLM_F_MULTI);
- if (err < 0 && err != -EOPNOTSUPP)
- break;
+ if (err < 0 && err != -EOPNOTSUPP) {
+ if (likely(skb->len))
+ break;
+
+ goto out_err;
+ }
}
idx++;
}
}
+ err = skb->len;
+out_err:
rcu_read_unlock();
cb->args[0] = idx;
- return skb->len;
+ return err;
}
static inline size_t bridge_nlmsg_size(void)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 346d3e85dfbc..b1be7c01efe2 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3754,8 +3754,11 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk)
spin_lock_irqsave(&q->lock, flags);
skb = __skb_dequeue(q);
- if (skb && (skb_next = skb_peek(q)))
+ if (skb && (skb_next = skb_peek(q))) {
icmp_next = is_icmp_err_skb(skb_next);
+ if (icmp_next)
+ sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_origin;
+ }
spin_unlock_irqrestore(&q->lock, flags);
if (is_icmp_err_skb(skb) && !icmp_next)
diff --git a/net/core/sock.c b/net/core/sock.c
index 79c6aee6af9b..727f924b7f91 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -139,10 +139,7 @@
#include <trace/events/sock.h>
-#ifdef CONFIG_INET
#include <net/tcp.h>
-#endif
-
#include <net/busy_poll.h>
static DEFINE_MUTEX(proto_list_mutex);
@@ -1803,28 +1800,24 @@ EXPORT_SYMBOL(skb_set_owner_w);
* delay queue. We want to allow the owner socket to send more
* packets, as if they were already TX completed by a typical driver.
* But we also want to keep skb->sk set because some packet schedulers
- * rely on it (sch_fq for example). So we set skb->truesize to a small
- * amount (1) and decrease sk_wmem_alloc accordingly.
+ * rely on it (sch_fq for example).
*/
void skb_orphan_partial(struct sk_buff *skb)
{
- /* If this skb is a TCP pure ACK or already went here,
- * we have nothing to do. 2 is already a very small truesize.
- */
- if (skb->truesize <= 2)
+ if (skb_is_tcp_pure_ack(skb))
return;
- /* TCP stack sets skb->ooo_okay based on sk_wmem_alloc,
- * so we do not completely orphan skb, but transfert all
- * accounted bytes but one, to avoid unexpected reorders.
- */
if (skb->destructor == sock_wfree
#ifdef CONFIG_INET
|| skb->destructor == tcp_wfree
#endif
) {
- atomic_sub(skb->truesize - 1, &skb->sk->sk_wmem_alloc);
- skb->truesize = 1;
+ struct sock *sk = skb->sk;
+
+ if (atomic_inc_not_zero(&sk->sk_refcnt)) {
+ atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
+ skb->destructor = sock_efree;
+ }
} else {
skb_orphan(skb);
}
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index ea23254b2457..b7cd9aafe99e 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -479,8 +479,6 @@ static __net_init int sysctl_core_net_init(struct net *net)
{
struct ctl_table *tbl;
- net->core.sysctl_somaxconn = SOMAXCONN;
-
tbl = netns_core_table;
if (!net_eq(net, &init_net)) {
tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL);