aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_ethtool.c4
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_netdev.c7
-rw-r--r--include/linux/filter.h2
-rw-r--r--include/net/inet_sock.h2
-rw-r--r--include/net/tcp.h5
-rw-r--r--kernel/bpf/arraymap.c2
-rw-r--r--kernel/bpf/devmap.c8
-rw-r--r--kernel/bpf/hashtab.c4
-rw-r--r--kernel/bpf/sockmap.c28
-rw-r--r--mm/percpu.c15
-rw-r--r--net/core/filter.c31
-rw-r--r--net/dccp/ipv4.c13
-rw-r--r--net/ipv4/cipso_ipv4.c24
-rw-r--r--net/ipv4/inet_connection_sock.c8
-rw-r--r--net/ipv4/syncookies.c2
-rw-r--r--net/ipv4/tcp_input.c2
-rw-r--r--net/ipv4/tcp_ipv4.c22
-rw-r--r--net/ncsi/ncsi-aen.c2
-rw-r--r--net/ncsi/ncsi-manage.c35
-rw-r--r--net/ncsi/ncsi-rsp.c2
-rw-r--r--net/packet/af_packet.c24
-rw-r--r--net/sctp/input.c2
-rw-r--r--net/sctp/socket.c4
-rw-r--r--net/vmw_vsock/hyperv_transport.c22
-rw-r--r--samples/sockmap/sockmap_kern.c2
-rw-r--r--tools/include/uapi/linux/bpf.h3
-rw-r--r--tools/testing/selftests/bpf/bpf_helpers.h2
-rw-r--r--tools/testing/selftests/bpf/sockmap_verdict_prog.c4
-rw-r--r--tools/testing/selftests/bpf/test_maps.c12
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c16
30 files changed, 193 insertions, 116 deletions
diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
index b1212debc2e1..967020fb26ee 100644
--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
@@ -742,8 +742,8 @@ static void ena_get_channels(struct net_device *netdev,
{
struct ena_adapter *adapter = netdev_priv(netdev);
- channels->max_rx = ENA_MAX_NUM_IO_QUEUES;
- channels->max_tx = ENA_MAX_NUM_IO_QUEUES;
+ channels->max_rx = adapter->num_queues;
+ channels->max_tx = adapter->num_queues;
channels->max_other = 0;
channels->max_combined = 0;
channels->rx_count = adapter->num_queues;
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index f7dc22f65d9f..c6bd5e24005d 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -966,7 +966,7 @@ static inline void ena_rx_checksum(struct ena_ring *rx_ring,
u64_stats_update_begin(&rx_ring->syncp);
rx_ring->rx_stats.bad_csum++;
u64_stats_update_end(&rx_ring->syncp);
- netif_err(rx_ring->adapter, rx_err, rx_ring->netdev,
+ netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
"RX IPv4 header checksum error\n");
return;
}
@@ -979,7 +979,7 @@ static inline void ena_rx_checksum(struct ena_ring *rx_ring,
u64_stats_update_begin(&rx_ring->syncp);
rx_ring->rx_stats.bad_csum++;
u64_stats_update_end(&rx_ring->syncp);
- netif_err(rx_ring->adapter, rx_err, rx_ring->netdev,
+ netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
"RX L4 checksum error\n");
skb->ip_summed = CHECKSUM_NONE;
return;
@@ -3064,7 +3064,8 @@ static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
if (ena_dev->mem_bar)
devm_iounmap(&pdev->dev, ena_dev->mem_bar);
- devm_iounmap(&pdev->dev, ena_dev->reg_bar);
+ if (ena_dev->reg_bar)
+ devm_iounmap(&pdev->dev, ena_dev->reg_bar);
release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
pci_release_selected_regions(pdev, release_bars);
diff --git a/include/linux/filter.h b/include/linux/filter.h
index d29e58fde364..818a0b26249e 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -728,7 +728,7 @@ void xdp_do_flush_map(void);
void bpf_warn_invalid_xdp_action(u32 act);
void bpf_warn_invalid_xdp_redirect(u32 ifindex);
-struct sock *do_sk_redirect_map(void);
+struct sock *do_sk_redirect_map(struct sk_buff *skb);
#ifdef CONFIG_BPF_JIT
extern int bpf_jit_enable;
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index aa95053dfc78..425752f768d2 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -96,7 +96,7 @@ struct inet_request_sock {
kmemcheck_bitfield_end(flags);
u32 ir_mark;
union {
- struct ip_options_rcu *opt;
+ struct ip_options_rcu __rcu *ireq_opt;
#if IS_ENABLED(CONFIG_IPV6)
struct {
struct ipv6_txoptions *ipv6_opt;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 89974c5286d8..b1ef98ebce53 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -840,6 +840,11 @@ struct tcp_skb_cb {
struct inet6_skb_parm h6;
#endif
} header; /* For incoming skbs */
+ struct {
+ __u32 key;
+ __u32 flags;
+ struct bpf_map *map;
+ } bpf;
};
};
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 98c0f00c3f5e..e2636737b69b 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -98,7 +98,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
array_size += (u64) attr->max_entries * elem_size * num_possible_cpus();
if (array_size >= U32_MAX - PAGE_SIZE ||
- elem_size > PCPU_MIN_UNIT_SIZE || bpf_array_alloc_percpu(array)) {
+ bpf_array_alloc_percpu(array)) {
bpf_map_area_free(array);
return ERR_PTR(-ENOMEM);
}
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index e093d9a2c4dd..52e0548ba548 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -78,6 +78,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
int err = -EINVAL;
u64 cost;
+ if (!capable(CAP_NET_ADMIN))
+ return ERR_PTR(-EPERM);
+
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE)
@@ -111,8 +114,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
err = -ENOMEM;
/* A per cpu bitfield with a bit per possible net device */
- dtab->flush_needed = __alloc_percpu(dev_map_bitmap_size(attr),
- __alignof__(unsigned long));
+ dtab->flush_needed = __alloc_percpu_gfp(dev_map_bitmap_size(attr),
+ __alignof__(unsigned long),
+ GFP_KERNEL | __GFP_NOWARN);
if (!dtab->flush_needed)
goto free_dtab;
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 431126f31ea3..6533f08d1238 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -317,10 +317,6 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
*/
goto free_htab;
- if (percpu && round_up(htab->map.value_size, 8) > PCPU_MIN_UNIT_SIZE)
- /* make sure the size for pcpu_alloc() is reasonable */
- goto free_htab;
-
htab->elem_size = sizeof(struct htab_elem) +
round_up(htab->map.key_size, 8);
if (percpu)
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 6424ce0e4969..2b6eb35ae5d3 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -39,6 +39,7 @@
#include <linux/workqueue.h>
#include <linux/list.h>
#include <net/strparser.h>
+#include <net/tcp.h>
struct bpf_stab {
struct bpf_map map;
@@ -101,9 +102,16 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
return SK_DROP;
skb_orphan(skb);
+ /* We need to ensure that BPF metadata for maps is also cleared
+ * when we orphan the skb so that we don't have the possibility
+ * to reference a stale map.
+ */
+ TCP_SKB_CB(skb)->bpf.map = NULL;
skb->sk = psock->sock;
bpf_compute_data_end(skb);
+ preempt_disable();
rc = (*prog->bpf_func)(skb, prog->insnsi);
+ preempt_enable();
skb->sk = NULL;
return rc;
@@ -114,17 +122,10 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
struct sock *sk;
int rc;
- /* Because we use per cpu values to feed input from sock redirect
- * in BPF program to do_sk_redirect_map() call we need to ensure we
- * are not preempted. RCU read lock is not sufficient in this case
- * with CONFIG_PREEMPT_RCU enabled so we must be explicit here.
- */
- preempt_disable();
rc = smap_verdict_func(psock, skb);
switch (rc) {
case SK_REDIRECT:
- sk = do_sk_redirect_map();
- preempt_enable();
+ sk = do_sk_redirect_map(skb);
if (likely(sk)) {
struct smap_psock *peer = smap_psock_sk(sk);
@@ -141,8 +142,6 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
/* Fall through and free skb otherwise */
case SK_DROP:
default:
- if (rc != SK_REDIRECT)
- preempt_enable();
kfree_skb(skb);
}
}
@@ -487,6 +486,9 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
int err = -EINVAL;
u64 cost;
+ if (!capable(CAP_NET_ADMIN))
+ return ERR_PTR(-EPERM);
+
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE)
@@ -840,6 +842,12 @@ static int sock_map_update_elem(struct bpf_map *map,
return -EINVAL;
}
+ if (skops.sk->sk_type != SOCK_STREAM ||
+ skops.sk->sk_protocol != IPPROTO_TCP) {
+ fput(socket->file);
+ return -EOPNOTSUPP;
+ }
+
err = sock_map_ctx_update_elem(&skops, map, key, flags);
fput(socket->file);
return err;
diff --git a/mm/percpu.c b/mm/percpu.c
index aa121cef76de..a0e0c82c1e4c 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1329,7 +1329,9 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
* @gfp: allocation flags
*
* Allocate percpu area of @size bytes aligned at @align. If @gfp doesn't
- * contain %GFP_KERNEL, the allocation is atomic.
+ * contain %GFP_KERNEL, the allocation is atomic. If @gfp has __GFP_NOWARN
+ * then no warning will be triggered on invalid or failed allocation
+ * requests.
*
* RETURNS:
* Percpu pointer to the allocated area on success, NULL on failure.
@@ -1337,10 +1339,11 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
gfp_t gfp)
{
+ bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL;
+ bool do_warn = !(gfp & __GFP_NOWARN);
static int warn_limit = 10;
struct pcpu_chunk *chunk;
const char *err;
- bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL;
int slot, off, cpu, ret;
unsigned long flags;
void __percpu *ptr;
@@ -1361,7 +1364,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE ||
!is_power_of_2(align))) {
- WARN(true, "illegal size (%zu) or align (%zu) for percpu allocation\n",
+ WARN(do_warn, "illegal size (%zu) or align (%zu) for percpu allocation\n",
size, align);
return NULL;
}
@@ -1482,7 +1485,7 @@ fail_unlock:
fail:
trace_percpu_alloc_percpu_fail(reserved, is_atomic, size, align);
- if (!is_atomic && warn_limit) {
+ if (!is_atomic && do_warn && warn_limit) {
pr_warn("allocation failed, size=%zu align=%zu atomic=%d, %s\n",
size, align, is_atomic, err);
dump_stack();
@@ -1507,7 +1510,9 @@ fail:
*
* Allocate zero-filled percpu area of @size bytes aligned at @align. If
* @gfp doesn't contain %GFP_KERNEL, the allocation doesn't block and can
- * be called from any context but is a lot more likely to fail.
+ * be called from any context but is a lot more likely to fail. If @gfp
+ * has __GFP_NOWARN then no warning will be triggered on invalid or failed
+ * allocation requests.
*
* RETURNS:
* Percpu pointer to the allocated area on success, NULL on failure.
diff --git a/net/core/filter.c b/net/core/filter.c
index 74b8c91fb5f4..aa0265997f93 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1839,31 +1839,31 @@ static const struct bpf_func_proto bpf_redirect_proto = {
.arg2_type = ARG_ANYTHING,
};
-BPF_CALL_3(bpf_sk_redirect_map, struct bpf_map *, map, u32, key, u64, flags)
+BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
+ struct bpf_map *, map, u32, key, u64, flags)
{
- struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
if (unlikely(flags))
return SK_ABORTED;
- ri->ifindex = key;
- ri->flags = flags;
- ri->map = map;
+ tcb->bpf.key = key;
+ tcb->bpf.flags = flags;
+ tcb->bpf.map = map;
return SK_REDIRECT;
}
-struct sock *do_sk_redirect_map(void)
+struct sock *do_sk_redirect_map(struct sk_buff *skb)
{
- struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
struct sock *sk = NULL;
- if (ri->map) {
- sk = __sock_map_lookup_elem(ri->map, ri->ifindex);
+ if (tcb->bpf.map) {
+ sk = __sock_map_lookup_elem(tcb->bpf.map, tcb->bpf.key);
- ri->ifindex = 0;
- ri->map = NULL;
- /* we do not clear flags for future lookup */
+ tcb->bpf.key = 0;
+ tcb->bpf.map = NULL;
}
return sk;
@@ -1873,9 +1873,10 @@ static const struct bpf_func_proto bpf_sk_redirect_map_proto = {
.func = bpf_sk_redirect_map,
.gpl_only = false,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_CONST_MAP_PTR,
- .arg2_type = ARG_ANYTHING,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_ANYTHING,
};
BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
@@ -3683,7 +3684,6 @@ static bool sk_skb_is_valid_access(int off, int size,
{
if (type == BPF_WRITE) {
switch (off) {
- case bpf_ctx_range(struct __sk_buff, mark):
case bpf_ctx_range(struct __sk_buff, tc_index):
case bpf_ctx_range(struct __sk_buff, priority):
break;
@@ -3693,6 +3693,7 @@ static bool sk_skb_is_valid_access(int off, int size,
}
switch (off) {
+ case bpf_ctx_range(struct __sk_buff, mark):
case bpf_ctx_range(struct __sk_buff, tc_classid):
return false;
case bpf_ctx_range(struct __sk_buff, data):
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 001c08696334..0490916864f9 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -414,8 +414,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
sk_daddr_set(newsk, ireq->ir_rmt_addr);
sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
newinet->inet_saddr = ireq->ir_loc_addr;
- newinet->inet_opt = ireq->opt;
- ireq->opt = NULL;
+ RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt));
newinet->mc_index = inet_iif(skb);
newinet->mc_ttl = ip_hdr(skb)->ttl;
newinet->inet_id = jiffies;
@@ -430,7 +429,10 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
if (__inet_inherit_port(sk, newsk) < 0)
goto put_and_exit;
*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
-
+ if (*own_req)
+ ireq->ireq_opt = NULL;
+ else
+ newinet->inet_opt = NULL;
return newsk;
exit_overflow:
@@ -441,6 +443,7 @@ exit:
__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS);
return NULL;
put_and_exit:
+ newinet->inet_opt = NULL;
inet_csk_prepare_forced_close(newsk);
dccp_done(newsk);
goto exit;
@@ -492,7 +495,7 @@ static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req
ireq->ir_rmt_addr);
err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
ireq->ir_rmt_addr,
- ireq->opt);
+ rcu_dereference(ireq->ireq_opt));
err = net_xmit_eval(err);
}
@@ -548,7 +551,7 @@ out:
static void dccp_v4_reqsk_destructor(struct request_sock *req)
{
dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg);
- kfree(inet_rsk(req)->opt);
+ kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
}
void dccp_syn_ack_timeout(const struct request_sock *req)
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 2ae8f54cb321..82178cc69c96 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1951,7 +1951,7 @@ int cipso_v4_req_setattr(struct request_sock *req,
buf = NULL;
req_inet = inet_rsk(req);
- opt = xchg(&req_inet->opt, opt);
+ opt = xchg((__force struct ip_options_rcu **)&req_inet->ireq_opt, opt);
if (opt)
kfree_rcu(opt, rcu);
@@ -1973,11 +1973,13 @@ req_setattr_failure:
* values on failure.
*
*/
-static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr)
+static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr)
{
+ struct ip_options_rcu *opt = rcu_dereference_protected(*opt_ptr, 1);
int hdr_delta = 0;
- struct ip_options_rcu *opt = *opt_ptr;
+ if (!opt || opt->opt.cipso == 0)
+ return 0;
if (opt->opt.srr || opt->opt.rr || opt->opt.ts || opt->opt.router_alert) {
u8 cipso_len;
u8 cipso_off;
@@ -2039,14 +2041,10 @@ static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr)
*/
void cipso_v4_sock_delattr(struct sock *sk)
{
- int hdr_delta;
- struct ip_options_rcu *opt;
struct inet_sock *sk_inet;
+ int hdr_delta;
sk_inet = inet_sk(sk);
- opt = rcu_dereference_protected(sk_inet->inet_opt, 1);
- if (!opt || opt->opt.cipso == 0)
- return;
hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt);
if (sk_inet->is_icsk && hdr_delta > 0) {
@@ -2066,15 +2064,7 @@ void cipso_v4_sock_delattr(struct sock *sk)
*/
void cipso_v4_req_delattr(struct request_sock *req)
{
- struct ip_options_rcu *opt;
- struct inet_request_sock *req_inet;
-
- req_inet = inet_rsk(req);
- opt = req_inet->opt;
- if (!opt || opt->opt.cipso == 0)
- return;
-
- cipso_v4_delopt(&req_inet->opt);
+ cipso_v4_delopt(&inet_rsk(req)->ireq_opt);
}
/**
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 67aec7a10686..5ec9136a7c36 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -540,9 +540,10 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
{
const struct inet_request_sock *ireq = inet_rsk(req);
struct net *net = read_pnet(&ireq->ireq_net);
- struct ip_options_rcu *opt = ireq->opt;
+ struct ip_options_rcu *opt;
struct rtable *rt;
+ opt = rcu_dereference(ireq->ireq_opt);
flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
sk->sk_protocol, inet_sk_flowi_flags(sk),
@@ -576,10 +577,9 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
struct flowi4 *fl4;
struct rtable *rt;
+ opt = rcu_dereference(ireq->ireq_opt);
fl4 = &newinet->cork.fl.u.ip4;
- rcu_read_lock();
- opt = rcu_dereference(newinet->inet_opt);
flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
sk->sk_protocol, inet_sk_flowi_flags(sk),
@@ -592,13 +592,11 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
goto no_route;
if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
goto route_err;
- rcu_read_unlock();
return &rt->dst;
route_err:
ip_rt_put(rt);
no_route:
- rcu_read_unlock();
__IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
return NULL;
}
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index b1bb1b3a1082..77cf32a80952 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -355,7 +355,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
/* We throwed the options of the initial SYN away, so we hope
* the ACK carries the same options again (see RFC1122 4.2.3.8)
*/
- ireq->opt = tcp_v4_save_options(sock_net(sk), skb);
+ RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(sock_net(sk), skb));
if (security_inet_conn_request(sk, skb, req)) {
reqsk_free(req);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c5d7656beeee..7eec3383702b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6196,7 +6196,7 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
struct inet_request_sock *ireq = inet_rsk(req);
kmemcheck_annotate_bitfield(ireq, flags);
- ireq->opt = NULL;
+ ireq->ireq_opt = NULL;
#if IS_ENABLED(CONFIG_IPV6)
ireq->pktopts = NULL;
#endif
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 85164d4d3e53..4c43365c374c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -877,7 +877,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
ireq->ir_rmt_addr,
- ireq->opt);
+ rcu_dereference(ireq->ireq_opt));
err = net_xmit_eval(err);
}
@@ -889,7 +889,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
*/
static void tcp_v4_reqsk_destructor(struct request_sock *req)
{
- kfree(inet_rsk(req)->opt);
+ kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
}
#ifdef CONFIG_TCP_MD5SIG
@@ -1265,10 +1265,11 @@ static void tcp_v4_init_req(struct request_sock *req,
struct sk_buff *skb)
{
struct inet_request_sock *ireq = inet_rsk(req);
+ struct net *net = sock_net(sk_listener);
sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
- ireq->opt = tcp_v4_save_options(sock_net(sk_listener), skb);
+ RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb));
}
static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
@@ -1355,10 +1356,9 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
sk_daddr_set(newsk, ireq->ir_rmt_addr);
sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
newsk->sk_bound_dev_if = ireq->ir_iif;
- newinet->inet_saddr = ireq->ir_loc_addr;
- inet_opt = ireq->opt;
- rcu_assign_pointer(newinet->inet_opt, inet_opt);
- ireq->opt = NULL;
+ newinet->inet_saddr = ireq->ir_loc_addr;
+ inet_opt = rcu_dereference(ireq->ireq_opt);
+ RCU_INIT_POINTER(newinet->inet_opt, inet_opt);
newinet->mc_index = inet_iif(skb);
newinet->mc_ttl = ip_hdr(skb)->ttl;
newinet->rcv_tos = ip_hdr(skb)->tos;
@@ -1403,9 +1403,12 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
if (__inet_inherit_port(sk, newsk) < 0)
goto put_and_exit;
*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
- if (*own_req)
+ if (likely(*own_req)) {
tcp_move_syn(newtp, req);
-
+ ireq->ireq_opt = NULL;
+ } else {
+ newinet->inet_opt = NULL;
+ }
return newsk;
exit_overflow:
@@ -1416,6 +1419,7 @@ exit:
tcp_listendrop(sk);
return NULL;
put_and_exit:
+ newinet->inet_opt = NULL;
inet_csk_prepare_forced_close(newsk);
tcp_done(newsk);
goto exit;
diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c
index 6898e7229285..f135938bf781 100644
--- a/net/ncsi/ncsi-aen.c
+++ b/net/ncsi/ncsi-aen.c
@@ -187,7 +187,7 @@ static struct ncsi_aen_handler {
} ncsi_aen_handlers[] = {
{ NCSI_PKT_AEN_LSC, 12, ncsi_aen_handler_lsc },
{ NCSI_PKT_AEN_CR, 4, ncsi_aen_handler_cr },
- { NCSI_PKT_AEN_HNCDSC, 4, ncsi_aen_handler_hncdsc }
+ { NCSI_PKT_AEN_HNCDSC, 8, ncsi_aen_handler_hncdsc }
};
int ncsi_aen_handler(struct ncsi_dev_priv *ndp, struct sk_buff *skb)
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index b6a449aa9d4b..28c42b22b748 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -189,6 +189,7 @@ static void ncsi_channel_monitor(unsigned long data)
struct ncsi_channel *nc = (struct ncsi_channel *)data;
struct ncsi_package *np = nc->package;
struct ncsi_dev_priv *ndp = np->ndp;
+ struct ncsi_channel_mode *ncm;
struct ncsi_cmd_arg nca;
bool enabled, chained;
unsigned int monitor_state;
@@ -202,11 +203,15 @@ static void ncsi_channel_monitor(unsigned long data)
monitor_state = nc->monitor.state;
spin_unlock_irqrestore(&nc->lock, flags);
- if (!enabled || chained)
+ if (!enabled || chained) {
+ ncsi_stop_channel_monitor(nc);
return;
+ }
if (state != NCSI_CHANNEL_INACTIVE &&
- state != NCSI_CHANNEL_ACTIVE)
+ state != NCSI_CHANNEL_ACTIVE) {
+ ncsi_stop_channel_monitor(nc);
return;
+ }
switch (monitor_state) {
case NCSI_CHANNEL_MONITOR_START:
@@ -217,28 +222,28 @@ static void ncsi_channel_monitor(unsigned long data)
nca.type = NCSI_PKT_CMD_GLS;
nca.req_flags = 0;
ret = ncsi_xmit_cmd(&nca);
- if (ret) {
+ if (ret)
netdev_err(ndp->ndev.dev, "Error %d sending GLS\n",
ret);
- return;
- }
-
break;
case NCSI_CHANNEL_MONITOR_WAIT ... NCSI_CHANNEL_MONITOR_WAIT_MAX:
break;
default:
- if (!(ndp->flags & NCSI_DEV_HWA) &&
- state == NCSI_CHANNEL_ACTIVE) {
+ if (!(ndp->flags & NCSI_DEV_HWA)) {
ncsi_report_link(ndp, true);
ndp->flags |= NCSI_DEV_RESHUFFLE;
}
+ ncsi_stop_channel_monitor(nc);
+
+ ncm = &nc->modes[NCSI_MODE_LINK];
spin_lock_irqsave(&nc->lock, flags);
nc->state = NCSI_CHANNEL_INVISIBLE;
+ ncm->data[2] &= ~0x1;
spin_unlock_irqrestore(&nc->lock, flags);
spin_lock_irqsave(&ndp->lock, flags);
- nc->state = NCSI_CHANNEL_INACTIVE;
+ nc->state = NCSI_CHANNEL_ACTIVE;
list_add_tail_rcu(&nc->link, &ndp->channel_queue);
spin_unlock_irqrestore(&ndp->lock, flags);
ncsi_process_next_channel(ndp);
@@ -1002,12 +1007,15 @@ static bool ncsi_check_hwa(struct ncsi_dev_priv *ndp)
struct ncsi_package *np;
struct ncsi_channel *nc;
unsigned int cap;
+ bool has_channel = false;
/* The hardware arbitration is disabled if any one channel
* doesn't support explicitly.
*/
NCSI_FOR_EACH_PACKAGE(ndp, np) {
NCSI_FOR_EACH_CHANNEL(np, nc) {
+ has_channel = true;
+
cap = nc->caps[NCSI_CAP_GENERIC].cap;
if (!(cap & NCSI_CAP_GENERIC_HWA) ||
(cap & NCSI_CAP_GENERIC_HWA_MASK) !=
@@ -1018,8 +1026,13 @@ static bool ncsi_check_hwa(struct ncsi_dev_priv *ndp)
}
}
- ndp->flags |= NCSI_DEV_HWA;
- return true;
+ if (has_channel) {
+ ndp->flags |= NCSI_DEV_HWA;
+ return true;
+ }
+
+ ndp->flags &= ~NCSI_DEV_HWA;
+ return false;
}
static int ncsi_enable_hwa(struct ncsi_dev_priv *ndp)
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index 265b9a892d41..927dad4759d1 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -959,7 +959,7 @@ static struct ncsi_rsp_handler {
{ NCSI_PKT_RSP_EGMF, 4, ncsi_rsp_handler_egmf },
{ NCSI_PKT_RSP_DGMF, 4, ncsi_rsp_handler_dgmf },
{ NCSI_PKT_RSP_SNFC, 4, ncsi_rsp_handler_snfc },
- { NCSI_PKT_RSP_GVI, 36, ncsi_rsp_handler_gvi },
+ { NCSI_PKT_RSP_GVI, 40, ncsi_rsp_handler_gvi },
{ NCSI_PKT_RSP_GC, 32, ncsi_rsp_handler_gc },
{ NCSI_PKT_RSP_GP, -1, ncsi_rsp_handler_gp },
{ NCSI_PKT_RSP_GCPS, 172, ncsi_rsp_handler_gcps },
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index bec01a3daf5b..2986941164b1 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1769,7 +1769,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
out:
if (err && rollover) {
- kfree(rollover);
+ kfree_rcu(rollover, rcu);
po->rollover = NULL;
}
mutex_unlock(&fanout_mutex);
@@ -1796,8 +1796,10 @@ static struct packet_fanout *fanout_release(struct sock *sk)
else
f = NULL;
- if (po->rollover)
+ if (po->rollover) {
kfree_rcu(po->rollover, rcu);
+ po->rollover = NULL;
+ }
}
mutex_unlock(&fanout_mutex);
@@ -3851,6 +3853,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
void *data = &val;
union tpacket_stats_u st;
struct tpacket_rollover_stats rstats;
+ struct packet_rollover *rollover;
if (level != SOL_PACKET)
return -ENOPROTOOPT;
@@ -3929,13 +3932,18 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
0);
break;
case PACKET_ROLLOVER_STATS:
- if (!po->rollover)
+ rcu_read_lock();
+ rollover = rcu_dereference(po->rollover);
+ if (rollover) {
+ rstats.tp_all = atomic_long_read(&rollover->num);
+ rstats.tp_huge = atomic_long_read(&rollover->num_huge);
+ rstats.tp_failed = atomic_long_read(&rollover->num_failed);
+ data = &rstats;
+ lv = sizeof(rstats);
+ }
+ rcu_read_unlock();
+ if (!rollover)
return -EINVAL;
- rstats.tp_all = atomic_long_read(&po->rollover->num);
- rstats.tp_huge = atomic_long_read(&po->rollover->num_huge);
- rstats.tp_failed = atomic_long_read(&po->rollover->num_failed);
- data = &rstats;
- lv = sizeof(rstats);
break;
case PACKET_TX_HAS_OFF:
val = po->tp_tx_has_off;
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 92a07141fd07..34f10e75f3b9 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -421,7 +421,7 @@ void sctp_icmp_redirect(struct sock *sk, struct sctp_transport *t,
{
struct dst_entry *dst;
- if (!t)
+ if (sock_owned_by_user(sk) || !t)
return;
dst = sctp_transport_dst_check(t);
if (dst)
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index d4730ada7f32..17841ab30798 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4906,6 +4906,10 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp)
struct socket *sock;
int err = 0;
+ /* Do not peel off from one netns to another one. */
+ if (!net_eq(current->nsproxy->net_ns, sock_net(sk)))
+ return -EINVAL;
+
if (!asoc)
return -EINVAL;
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index 14ed5a344cdf..e21991fe883a 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -310,11 +310,15 @@ static void hvs_close_connection(struct vmbus_channel *chan)
struct sock *sk = get_per_channel_state(chan);
struct vsock_sock *vsk = vsock_sk(sk);
+ lock_sock(sk);
+
sk->sk_state = SS_UNCONNECTED;
sock_set_flag(sk, SOCK_DONE);
vsk->peer_shutdown |= SEND_SHUTDOWN | RCV_SHUTDOWN;
sk->sk_state_change(sk);
+
+ release_sock(sk);
}
static void hvs_open_connection(struct vmbus_channel *chan)
@@ -344,6 +348,8 @@ static void hvs_open_connection(struct vmbus_channel *chan)
if (!sk)
return;
+ lock_sock(sk);
+
if ((conn_from_host && sk->sk_state != VSOCK_SS_LISTEN) ||
(!conn_from_host && sk->sk_state != SS_CONNECTING))
goto out;
@@ -395,9 +401,7 @@ static void hvs_open_connection(struct vmbus_channel *chan)
vsock_insert_connected(vnew);
- lock_sock(sk);
vsock_enqueue_accept(sk, new);
- release_sock(sk);
} else {
sk->sk_state = SS_CONNECTED;
sk->sk_socket->state = SS_CONNECTED;
@@ -410,6 +414,8 @@ static void hvs_open_connection(struct vmbus_channel *chan)
out:
/* Release refcnt obtained when we called vsock_find_bound_socket() */
sock_put(sk);
+
+ release_sock(sk);
}
static u32 hvs_get_local_cid(void)
@@ -476,13 +482,21 @@ out:
static void hvs_release(struct vsock_sock *vsk)
{
+ struct sock *sk = sk_vsock(vsk);
struct hvsock *hvs = vsk->trans;
- struct vmbus_channel *chan = hvs->chan;
+ struct vmbus_channel *chan;
+ lock_sock(sk);
+
+ sk->sk_state = SS_DISCONNECTING;
+ vsock_remove_sock(vsk);
+
+ release_sock(sk);
+
+ chan = hvs->chan;
if (chan)
hvs_shutdown(vsk, RCV_SHUTDOWN | SEND_SHUTDOWN);
- vsock_remove_sock(vsk);
}
static void hvs_destruct(struct vsock_sock *vsk)
diff --git a/samples/sockmap/sockmap_kern.c b/samples/sockmap/sockmap_kern.c
index f9b38ef82dc2..52b0053274f4 100644
--- a/samples/sockmap/sockmap_kern.c
+++ b/samples/sockmap/sockmap_kern.c
@@ -62,7 +62,7 @@ int bpf_prog2(struct __sk_buff *skb)
ret = 1;
bpf_printk("sockmap: %d -> %d @ %d\n", lport, bpf_ntohl(rport), ret);
- return bpf_sk_redirect_map(&sock_map, ret, 0);
+ return bpf_sk_redirect_map(skb, &sock_map, ret, 0);
}
SEC("sockops")
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 43ab5c402f98..be9a631a69f7 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -569,9 +569,10 @@ union bpf_attr {
* @flags: reserved for future use
* Return: 0 on success or negative error code
*
- * int bpf_sk_redirect_map(map, key, flags)
+ * int bpf_sk_redirect_map(skb, map, key, flags)
* Redirect skb to a sock in map using key as a lookup key for the
* sock in map.
+ * @skb: pointer to skb
* @map: pointer to sockmap
* @key: key to lookup sock in map
* @flags: reserved for future use
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 36fb9161b34a..b2e02bdcd098 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -65,7 +65,7 @@ static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval,
int optlen) =
(void *) BPF_FUNC_setsockopt;
-static int (*bpf_sk_redirect_map)(void *map, int key, int flags) =
+static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) =
(void *) BPF_FUNC_sk_redirect_map;
static int (*bpf_sock_map_update)(void *map, void *key, void *value,
unsigned long long flags) =
diff --git a/tools/testing/selftests/bpf/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/sockmap_verdict_prog.c
index 9b99bd10807d..2cd2d552938b 100644
--- a/tools/testing/selftests/bpf/sockmap_verdict_prog.c
+++ b/tools/testing/selftests/bpf/sockmap_verdict_prog.c
@@ -61,8 +61,8 @@ int bpf_prog2(struct __sk_buff *skb)
bpf_printk("verdict: data[0] = redir(%u:%u)\n", map, sk);
if (!map)
- return bpf_sk_redirect_map(&sock_map_rx, sk, 0);
- return bpf_sk_redirect_map(&sock_map_tx, sk, 0);
+ return bpf_sk_redirect_map(skb, &sock_map_rx, sk, 0);
+ return bpf_sk_redirect_map(skb, &sock_map_tx, sk, 0);
}
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index fe3a443a1102..50ce52d2013d 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -466,7 +466,7 @@ static void test_sockmap(int tasks, void *data)
int one = 1, map_fd_rx, map_fd_tx, map_fd_break, s, sc, rc;
struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_break;
int ports[] = {50200, 50201, 50202, 50204};
- int err, i, fd, sfd[6] = {0xdeadbeef};
+ int err, i, fd, udp, sfd[6] = {0xdeadbeef};
u8 buf[20] = {0x0, 0x5, 0x3, 0x2, 0x1, 0x0};
int parse_prog, verdict_prog;
struct sockaddr_in addr;
@@ -548,6 +548,16 @@ static void test_sockmap(int tasks, void *data)
goto out_sockmap;
}
+ /* Test update with unsupported UDP socket */
+ udp = socket(AF_INET, SOCK_DGRAM, 0);
+ i = 0;
+ err = bpf_map_update_elem(fd, &i, &udp, BPF_ANY);
+ if (!err) {
+ printf("Failed socket SOCK_DGRAM allowed '%i:%i'\n",
+ i, udp);
+ goto out_sockmap;
+ }
+
/* Test update without programs */
for (i = 0; i < 6; i++) {
err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 3c7d3a45a3c5..50e15cedbb7f 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -1130,15 +1130,27 @@ static struct bpf_test tests[] = {
.errstr = "invalid bpf_context access",
},
{
- "check skb->mark is writeable by SK_SKB",
+ "invalid access of skb->mark for SK_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ .errstr = "invalid bpf_context access",
+ },
+ {
+ "check skb->mark is not writeable by SK_SKB",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
offsetof(struct __sk_buff, mark)),
BPF_EXIT_INSN(),
},
- .result = ACCEPT,
+ .result = REJECT,
.prog_type = BPF_PROG_TYPE_SK_SKB,
+ .errstr = "invalid bpf_context access",
},
{
"check skb->tc_index is writeable by SK_SKB",