aboutsummaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c194
-rw-r--r--net/core/drop_monitor.c2
-rw-r--r--net/core/filter.c534
-rw-r--r--net/core/flow_dissector.c171
-rw-r--r--net/core/lwtunnel.c35
-rw-r--r--net/core/neighbour.c3
-rw-r--r--net/core/net_namespace.c55
-rw-r--r--net/core/pktgen.c38
-rw-r--r--net/core/rtnetlink.c324
-rw-r--r--net/core/skbuff.c150
-rw-r--r--net/core/sock.c43
-rw-r--r--net/core/sock_reuseport.c1
-rw-r--r--net/core/stream.c1
13 files changed, 1109 insertions, 442 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index ea6312057a71..6666b28b6815 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1766,19 +1766,14 @@ EXPORT_SYMBOL_GPL(is_skb_forwardable);
int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
{
- if (skb_orphan_frags(skb, GFP_ATOMIC) ||
- unlikely(!is_skb_forwardable(dev, skb))) {
- atomic_long_inc(&dev->rx_dropped);
- kfree_skb(skb);
- return NET_RX_DROP;
- }
+ int ret = ____dev_forward_skb(dev, skb);
- skb_scrub_packet(skb, true);
- skb->priority = 0;
- skb->protocol = eth_type_trans(skb, dev);
- skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+ if (likely(!ret)) {
+ skb->protocol = eth_type_trans(skb, dev);
+ skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+ }
- return 0;
+ return ret;
}
EXPORT_SYMBOL_GPL(__dev_forward_skb);
@@ -2484,7 +2479,7 @@ int skb_checksum_help(struct sk_buff *skb)
goto out;
}
- *(__sum16 *)(skb->data + offset) = csum_fold(csum);
+ *(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0;
out_set_summed:
skb->ip_summed = CHECKSUM_NONE;
out:
@@ -3035,6 +3030,7 @@ struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *d
}
return head;
}
+EXPORT_SYMBOL_GPL(validate_xmit_skb_list);
static void qdisc_pkt_len_init(struct sk_buff *skb)
{
@@ -3355,16 +3351,6 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
else
skb_dst_force(skb);
-#ifdef CONFIG_NET_SWITCHDEV
- /* Don't forward if offload device already forwarded */
- if (skb->offload_fwd_mark &&
- skb->offload_fwd_mark == dev->offload_fwd_mark) {
- consume_skb(skb);
- rc = NET_XMIT_SUCCESS;
- goto out;
- }
-#endif
-
txq = netdev_pick_tx(dev, skb, accel_priv);
q = rcu_dereference_bh(txq->qdisc);
@@ -3855,7 +3841,7 @@ int netif_rx_ni(struct sk_buff *skb)
}
EXPORT_SYMBOL(netif_rx_ni);
-static void net_tx_action(struct softirq_action *h)
+static __latent_entropy void net_tx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
@@ -3914,8 +3900,7 @@ static void net_tx_action(struct softirq_action *h)
}
}
-#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
- (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
+#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_ATM_LANE)
/* This hook is defined here for ATM LANE */
int (*br_fdb_test_addr_hook)(struct net_device *dev,
unsigned char *addr) __read_mostly;
@@ -4066,12 +4051,17 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
{
#ifdef CONFIG_NETFILTER_INGRESS
if (nf_hook_ingress_active(skb)) {
+ int ingress_retval;
+
if (*pt_prev) {
*ret = deliver_skb(skb, *pt_prev, orig_dev);
*pt_prev = NULL;
}
- return nf_hook_ingress(skb);
+ rcu_read_lock();
+ ingress_retval = nf_hook_ingress(skb);
+ rcu_read_unlock();
+ return ingress_retval;
}
#endif /* CONFIG_NETFILTER_INGRESS */
return 0;
@@ -4308,32 +4298,53 @@ int netif_receive_skb(struct sk_buff *skb)
}
EXPORT_SYMBOL(netif_receive_skb);
-/* Network device is going away, flush any packets still pending
- * Called with irqs disabled.
- */
-static void flush_backlog(void *arg)
+DEFINE_PER_CPU(struct work_struct, flush_works);
+
+/* Network device is going away, flush any packets still pending */
+static void flush_backlog(struct work_struct *work)
{
- struct net_device *dev = arg;
- struct softnet_data *sd = this_cpu_ptr(&softnet_data);
struct sk_buff *skb, *tmp;
+ struct softnet_data *sd;
+ local_bh_disable();
+ sd = this_cpu_ptr(&softnet_data);
+
+ local_irq_disable();
rps_lock(sd);
skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
- if (skb->dev == dev) {
+ if (skb->dev->reg_state == NETREG_UNREGISTERING) {
__skb_unlink(skb, &sd->input_pkt_queue);
kfree_skb(skb);
input_queue_head_incr(sd);
}
}
rps_unlock(sd);
+ local_irq_enable();
skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
- if (skb->dev == dev) {
+ if (skb->dev->reg_state == NETREG_UNREGISTERING) {
__skb_unlink(skb, &sd->process_queue);
kfree_skb(skb);
input_queue_head_incr(sd);
}
}
+ local_bh_enable();
+}
+
+static void flush_all_backlogs(void)
+{
+ unsigned int cpu;
+
+ get_online_cpus();
+
+ for_each_online_cpu(cpu)
+ queue_work_on(cpu, system_highpri_wq,
+ per_cpu_ptr(&flush_works, cpu));
+
+ for_each_online_cpu(cpu)
+ flush_work(per_cpu_ptr(&flush_works, cpu));
+
+ put_online_cpus();
}
static int napi_gro_complete(struct sk_buff *skb)
@@ -4496,6 +4507,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
NAPI_GRO_CB(skb)->flush = 0;
NAPI_GRO_CB(skb)->free = 0;
NAPI_GRO_CB(skb)->encap_mark = 0;
+ NAPI_GRO_CB(skb)->recursion_counter = 0;
NAPI_GRO_CB(skb)->is_fou = 0;
NAPI_GRO_CB(skb)->is_atomic = 1;
NAPI_GRO_CB(skb)->gro_remcsum_start = 0;
@@ -4821,8 +4833,9 @@ static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
static int process_backlog(struct napi_struct *napi, int quota)
{
- int work = 0;
struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
+ bool again = true;
+ int work = 0;
/* Check if we have pending ipi, its better to send them now,
* not waiting net_rx_action() end.
@@ -4833,23 +4846,20 @@ static int process_backlog(struct napi_struct *napi, int quota)
}
napi->weight = weight_p;
- local_irq_disable();
- while (1) {
+ while (again) {
struct sk_buff *skb;
while ((skb = __skb_dequeue(&sd->process_queue))) {
rcu_read_lock();
- local_irq_enable();
__netif_receive_skb(skb);
rcu_read_unlock();
- local_irq_disable();
input_queue_head_incr(sd);
- if (++work >= quota) {
- local_irq_enable();
+ if (++work >= quota)
return work;
- }
+
}
+ local_irq_disable();
rps_lock(sd);
if (skb_queue_empty(&sd->input_pkt_queue)) {
/*
@@ -4861,16 +4871,14 @@ static int process_backlog(struct napi_struct *napi, int quota)
* and we dont need an smp_mb() memory barrier.
*/
napi->state = 0;
- rps_unlock(sd);
-
- break;
+ again = false;
+ } else {
+ skb_queue_splice_tail_init(&sd->input_pkt_queue,
+ &sd->process_queue);
}
-
- skb_queue_splice_tail_init(&sd->input_pkt_queue,
- &sd->process_queue);
rps_unlock(sd);
+ local_irq_enable();
}
- local_irq_enable();
return work;
}
@@ -5187,7 +5195,7 @@ out_unlock:
return work;
}
-static void net_rx_action(struct softirq_action *h)
+static __latent_entropy void net_rx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
unsigned long time_limit = jiffies + 2;
@@ -5500,10 +5508,14 @@ struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev,
{
struct netdev_adjacent *lower;
- lower = list_first_or_null_rcu(&dev->all_adj_list.lower,
- struct netdev_adjacent, list);
+ lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
- return lower ? lower->dev : NULL;
+ if (&lower->list == &dev->all_adj_list.lower)
+ return NULL;
+
+ *iter = &lower->list;
+
+ return lower->dev;
}
EXPORT_SYMBOL(netdev_all_lower_get_next_rcu);
@@ -5578,6 +5590,7 @@ static inline bool netdev_adjacent_is_neigh_list(struct net_device *dev,
static int __netdev_adjacent_dev_insert(struct net_device *dev,
struct net_device *adj_dev,
+ u16 ref_nr,
struct list_head *dev_list,
void *private, bool master)
{
@@ -5587,7 +5600,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
adj = __netdev_find_adj(adj_dev, dev_list);
if (adj) {
- adj->ref_nr++;
+ adj->ref_nr += ref_nr;
return 0;
}
@@ -5597,7 +5610,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
adj->dev = adj_dev;
adj->master = master;
- adj->ref_nr = 1;
+ adj->ref_nr = ref_nr;
adj->private = private;
dev_hold(adj_dev);
@@ -5636,6 +5649,7 @@ free_adj:
static void __netdev_adjacent_dev_remove(struct net_device *dev,
struct net_device *adj_dev,
+ u16 ref_nr,
struct list_head *dev_list)
{
struct netdev_adjacent *adj;
@@ -5648,10 +5662,10 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev,
BUG();
}
- if (adj->ref_nr > 1) {
- pr_debug("%s to %s ref_nr-- = %d\n", dev->name, adj_dev->name,
- adj->ref_nr-1);
- adj->ref_nr--;
+ if (adj->ref_nr > ref_nr) {
+ pr_debug("%s to %s ref_nr-%d = %d\n", dev->name, adj_dev->name,
+ ref_nr, adj->ref_nr-ref_nr);
+ adj->ref_nr -= ref_nr;
return;
}
@@ -5670,21 +5684,22 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev,
static int __netdev_adjacent_dev_link_lists(struct net_device *dev,
struct net_device *upper_dev,
+ u16 ref_nr,
struct list_head *up_list,
struct list_head *down_list,
void *private, bool master)
{
int ret;
- ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private,
- master);
+ ret = __netdev_adjacent_dev_insert(dev, upper_dev, ref_nr, up_list,
+ private, master);
if (ret)
return ret;
- ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private,
- false);
+ ret = __netdev_adjacent_dev_insert(upper_dev, dev, ref_nr, down_list,
+ private, false);
if (ret) {
- __netdev_adjacent_dev_remove(dev, upper_dev, up_list);
+ __netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list);
return ret;
}
@@ -5692,9 +5707,10 @@ static int __netdev_adjacent_dev_link_lists(struct net_device *dev,
}
static int __netdev_adjacent_dev_link(struct net_device *dev,
- struct net_device *upper_dev)
+ struct net_device *upper_dev,
+ u16 ref_nr)
{
- return __netdev_adjacent_dev_link_lists(dev, upper_dev,
+ return __netdev_adjacent_dev_link_lists(dev, upper_dev, ref_nr,
&dev->all_adj_list.upper,
&upper_dev->all_adj_list.lower,
NULL, false);
@@ -5702,17 +5718,19 @@ static int __netdev_adjacent_dev_link(struct net_device *dev,
static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,
struct net_device *upper_dev,
+ u16 ref_nr,
struct list_head *up_list,
struct list_head *down_list)
{
- __netdev_adjacent_dev_remove(dev, upper_dev, up_list);
- __netdev_adjacent_dev_remove(upper_dev, dev, down_list);
+ __netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list);
+ __netdev_adjacent_dev_remove(upper_dev, dev, ref_nr, down_list);
}
static void __netdev_adjacent_dev_unlink(struct net_device *dev,
- struct net_device *upper_dev)
+ struct net_device *upper_dev,
+ u16 ref_nr)
{
- __netdev_adjacent_dev_unlink_lists(dev, upper_dev,
+ __netdev_adjacent_dev_unlink_lists(dev, upper_dev, ref_nr,
&dev->all_adj_list.upper,
&upper_dev->all_adj_list.lower);
}
@@ -5721,17 +5739,17 @@ static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
struct net_device *upper_dev,
void *private, bool master)
{
- int ret = __netdev_adjacent_dev_link(dev, upper_dev);
+ int ret = __netdev_adjacent_dev_link(dev, upper_dev, 1);
if (ret)
return ret;
- ret = __netdev_adjacent_dev_link_lists(dev, upper_dev,
+ ret = __netdev_adjacent_dev_link_lists(dev, upper_dev, 1,
&dev->adj_list.upper,
&upper_dev->adj_list.lower,
private, master);
if (ret) {
- __netdev_adjacent_dev_unlink(dev, upper_dev);
+ __netdev_adjacent_dev_unlink(dev, upper_dev, 1);
return ret;
}
@@ -5741,8 +5759,8 @@ static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
struct net_device *upper_dev)
{
- __netdev_adjacent_dev_unlink(dev, upper_dev);
- __netdev_adjacent_dev_unlink_lists(dev, upper_dev,
+ __netdev_adjacent_dev_unlink(dev, upper_dev, 1);
+ __netdev_adjacent_dev_unlink_lists(dev, upper_dev, 1,
&dev->adj_list.upper,
&upper_dev->adj_list.lower);
}
@@ -5795,7 +5813,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
pr_debug("Interlinking %s with %s, non-neighbour\n",
i->dev->name, j->dev->name);
- ret = __netdev_adjacent_dev_link(i->dev, j->dev);
+ ret = __netdev_adjacent_dev_link(i->dev, j->dev, i->ref_nr);
if (ret)
goto rollback_mesh;
}
@@ -5805,7 +5823,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
pr_debug("linking %s's upper device %s with %s\n",
upper_dev->name, i->dev->name, dev->name);
- ret = __netdev_adjacent_dev_link(dev, i->dev);
+ ret = __netdev_adjacent_dev_link(dev, i->dev, i->ref_nr);
if (ret)
goto rollback_upper_mesh;
}
@@ -5814,7 +5832,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
list_for_each_entry(i, &dev->all_adj_list.lower, list) {
pr_debug("linking %s's lower device %s with %s\n", dev->name,
i->dev->name, upper_dev->name);
- ret = __netdev_adjacent_dev_link(i->dev, upper_dev);
+ ret = __netdev_adjacent_dev_link(i->dev, upper_dev, i->ref_nr);
if (ret)
goto rollback_lower_mesh;
}
@@ -5832,7 +5850,7 @@ rollback_lower_mesh:
list_for_each_entry(i, &dev->all_adj_list.lower, list) {
if (i == to_i)
break;
- __netdev_adjacent_dev_unlink(i->dev, upper_dev);
+ __netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr);
}
i = NULL;
@@ -5842,7 +5860,7 @@ rollback_upper_mesh:
list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
if (i == to_i)
break;
- __netdev_adjacent_dev_unlink(dev, i->dev);
+ __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr);
}
i = j = NULL;
@@ -5854,7 +5872,7 @@ rollback_mesh:
list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
if (i == to_i && j == to_j)
break;
- __netdev_adjacent_dev_unlink(i->dev, j->dev);
+ __netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr);
}
if (i == to_i)
break;
@@ -5934,16 +5952,16 @@ void netdev_upper_dev_unlink(struct net_device *dev,
*/
list_for_each_entry(i, &dev->all_adj_list.lower, list)
list_for_each_entry(j, &upper_dev->all_adj_list.upper, list)
- __netdev_adjacent_dev_unlink(i->dev, j->dev);
+ __netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr);
/* remove also the devices itself from lower/upper device
* list
*/
list_for_each_entry(i, &dev->all_adj_list.lower, list)
- __netdev_adjacent_dev_unlink(i->dev, upper_dev);
+ __netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr);
list_for_each_entry(i, &upper_dev->all_adj_list.upper, list)
- __netdev_adjacent_dev_unlink(dev, i->dev);
+ __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr);
call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
&changeupper_info.info);
@@ -6723,8 +6741,8 @@ static void rollback_registered_many(struct list_head *head)
unlist_netdevice(dev);
dev->reg_state = NETREG_UNREGISTERING;
- on_each_cpu(flush_backlog, dev, 1);
}
+ flush_all_backlogs();
synchronize_net();
@@ -7641,6 +7659,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
INIT_LIST_HEAD(&dev->all_adj_list.lower);
INIT_LIST_HEAD(&dev->ptype_all);
INIT_LIST_HEAD(&dev->ptype_specific);
+#ifdef CONFIG_NET_SCHED
+ hash_init(dev->qdisc_hash);
+#endif
dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
setup(dev);
@@ -8286,8 +8307,11 @@ static int __init net_dev_init(void)
*/
for_each_possible_cpu(i) {
+ struct work_struct *flush = per_cpu_ptr(&flush_works, i);
struct softnet_data *sd = &per_cpu(softnet_data, i);
+ INIT_WORK(flush, flush_backlog);
+
skb_queue_head_init(&sd->input_pkt_queue);
skb_queue_head_init(&sd->process_queue);
INIT_LIST_HEAD(&sd->poll_list);
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index d6b3b579560d..72cfb0c61125 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -105,7 +105,7 @@ static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data)
return skb;
}
-static struct genl_multicast_group dropmon_mcgrps[] = {
+static const struct genl_multicast_group dropmon_mcgrps[] = {
{ .name = "events", },
};
diff --git a/net/core/filter.c b/net/core/filter.c
index cb06aceb512a..b391209838ef 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -94,14 +94,13 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
}
EXPORT_SYMBOL(sk_filter_trim_cap);
-static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
+BPF_CALL_1(__skb_get_pay_offset, struct sk_buff *, skb)
{
- return skb_get_poff((struct sk_buff *)(unsigned long) ctx);
+ return skb_get_poff(skb);
}
-static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
+BPF_CALL_3(__skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x)
{
- struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx;
struct nlattr *nla;
if (skb_is_nonlinear(skb))
@@ -120,9 +119,8 @@ static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
return 0;
}
-static u64 __skb_get_nlattr_nest(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
+BPF_CALL_3(__skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x)
{
- struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx;
struct nlattr *nla;
if (skb_is_nonlinear(skb))
@@ -145,7 +143,7 @@ static u64 __skb_get_nlattr_nest(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
return 0;
}
-static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
+BPF_CALL_0(__get_raw_cpu_id)
{
return raw_smp_processor_id();
}
@@ -233,9 +231,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
case SKF_AD_OFF + SKF_AD_HATYPE:
BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
- BUILD_BUG_ON(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)) < 0);
- *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)),
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
BPF_REG_TMP, BPF_REG_CTX,
offsetof(struct sk_buff, dev));
/* if (tmp != 0) goto pc + 1 */
@@ -1350,17 +1347,26 @@ struct bpf_scratchpad {
static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp);
+static inline int __bpf_try_make_writable(struct sk_buff *skb,
+ unsigned int write_len)
+{
+ return skb_ensure_writable(skb, write_len);
+}
+
static inline int bpf_try_make_writable(struct sk_buff *skb,
unsigned int write_len)
{
- int err;
+ int err = __bpf_try_make_writable(skb, write_len);
- err = skb_ensure_writable(skb, write_len);
bpf_compute_data_end(skb);
-
return err;
}
+static int bpf_try_make_head_writable(struct sk_buff *skb)
+{
+ return bpf_try_make_writable(skb, skb_headlen(skb));
+}
+
static inline void bpf_push_mac_rcsum(struct sk_buff *skb)
{
if (skb_at_tc_ingress(skb))
@@ -1373,12 +1379,9 @@ static inline void bpf_pull_mac_rcsum(struct sk_buff *skb)
skb_postpull_rcsum(skb, skb_mac_header(skb), skb->mac_len);
}
-static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
+BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset,
+ const void *, from, u32, len, u64, flags)
{
- struct sk_buff *skb = (struct sk_buff *) (long) r1;
- unsigned int offset = (unsigned int) r2;
- void *from = (void *) (long) r3;
- unsigned int len = (unsigned int) r4;
void *ptr;
if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
@@ -1413,12 +1416,9 @@ static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
.arg5_type = ARG_ANYTHING,
};
-static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset,
+ void *, to, u32, len)
{
- const struct sk_buff *skb = (const struct sk_buff *)(unsigned long) r1;
- unsigned int offset = (unsigned int) r2;
- void *to = (void *)(unsigned long) r3;
- unsigned int len = (unsigned int) r4;
void *ptr;
if (unlikely(offset > 0xffff))
@@ -1446,10 +1446,31 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
.arg4_type = ARG_CONST_STACK_SIZE,
};
-static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
+BPF_CALL_2(bpf_skb_pull_data, struct sk_buff *, skb, u32, len)
+{
+ /* Idea is the following: should the needed direct read/write
+ * test fail during runtime, we can pull in more data and redo
+ * again, since implicitly, we invalidate previous checks here.
+ *
+ * Or, since we know how much we need to make read/writeable,
+ * this can be done once at the program beginning for direct
+ * access case. By this we overcome limitations of only current
+ * headroom being accessible.
+ */
+ return bpf_try_make_writable(skb, len ? : skb_headlen(skb));
+}
+
+static const struct bpf_func_proto bpf_skb_pull_data_proto = {
+ .func = bpf_skb_pull_data,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+
+BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset,
+ u64, from, u64, to, u64, flags)
{
- struct sk_buff *skb = (struct sk_buff *) (long) r1;
- unsigned int offset = (unsigned int) r2;
__sum16 *ptr;
if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK)))
@@ -1491,12 +1512,11 @@ static const struct bpf_func_proto bpf_l3_csum_replace_proto = {
.arg5_type = ARG_ANYTHING,
};
-static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
+BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
+ u64, from, u64, to, u64, flags)
{
- struct sk_buff *skb = (struct sk_buff *) (long) r1;
bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
- unsigned int offset = (unsigned int) r2;
__sum16 *ptr;
if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_PSEUDO_HDR |
@@ -1544,12 +1564,11 @@ static const struct bpf_func_proto bpf_l4_csum_replace_proto = {
.arg5_type = ARG_ANYTHING,
};
-static u64 bpf_csum_diff(u64 r1, u64 from_size, u64 r3, u64 to_size, u64 seed)
+BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size,
+ __be32 *, to, u32, to_size, __wsum, seed)
{
struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
- u64 diff_size = from_size + to_size;
- __be32 *from = (__be32 *) (long) r1;
- __be32 *to = (__be32 *) (long) r3;
+ u32 diff_size = from_size + to_size;
int i, j = 0;
/* This is quite flexible, some examples:
@@ -1575,6 +1594,7 @@ static u64 bpf_csum_diff(u64 r1, u64 from_size, u64 r3, u64 to_size, u64 seed)
static const struct bpf_func_proto bpf_csum_diff_proto = {
.func = bpf_csum_diff,
.gpl_only = false,
+ .pkt_access = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_STACK,
.arg2_type = ARG_CONST_STACK_SIZE_OR_ZERO,
@@ -1583,11 +1603,44 @@ static const struct bpf_func_proto bpf_csum_diff_proto = {
.arg5_type = ARG_ANYTHING,
};
+BPF_CALL_2(bpf_csum_update, struct sk_buff *, skb, __wsum, csum)
+{
+ /* The interface is to be used in combination with bpf_csum_diff()
+ * for direct packet writes. csum rotation for alignment as well
+ * as emulating csum_sub() can be done from the eBPF program.
+ */
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ return (skb->csum = csum_add(skb->csum, csum));
+
+ return -ENOTSUPP;
+}
+
+static const struct bpf_func_proto bpf_csum_update_proto = {
+ .func = bpf_csum_update,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+
static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
{
return dev_forward_skb(dev, skb);
}
+static inline int __bpf_rx_skb_no_mac(struct net_device *dev,
+ struct sk_buff *skb)
+{
+ int ret = ____dev_forward_skb(dev, skb);
+
+ if (likely(!ret)) {
+ skb->dev = dev;
+ ret = netif_rx(skb);
+ }
+
+ return ret;
+}
+
static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
{
int ret;
@@ -1607,10 +1660,56 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
return ret;
}
-static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
+static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev,
+ u32 flags)
+{
+ /* skb->mac_len is not set on normal egress */
+ unsigned int mlen = skb->network_header - skb->mac_header;
+
+ __skb_pull(skb, mlen);
+
+ /* At ingress, the mac header has already been pulled once.
+ * At egress, skb_pospull_rcsum has to be done in case that
+ * the skb is originated from ingress (i.e. a forwarded skb)
+ * to ensure that rcsum starts at net header.
+ */
+ if (!skb_at_tc_ingress(skb))
+ skb_postpull_rcsum(skb, skb_mac_header(skb), mlen);
+ skb_pop_mac_header(skb);
+ skb_reset_mac_len(skb);
+ return flags & BPF_F_INGRESS ?
+ __bpf_rx_skb_no_mac(dev, skb) : __bpf_tx_skb(dev, skb);
+}
+
+static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev,
+ u32 flags)
+{
+ bpf_push_mac_rcsum(skb);
+ return flags & BPF_F_INGRESS ?
+ __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
+}
+
+static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
+ u32 flags)
+{
+ switch (dev->type) {
+ case ARPHRD_TUNNEL:
+ case ARPHRD_TUNNEL6:
+ case ARPHRD_SIT:
+ case ARPHRD_IPGRE:
+ case ARPHRD_VOID:
+ case ARPHRD_NONE:
+ return __bpf_redirect_no_mac(skb, dev, flags);
+ default:
+ return __bpf_redirect_common(skb, dev, flags);
+ }
+}
+
+BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
{
- struct sk_buff *skb = (struct sk_buff *) (long) r1;
struct net_device *dev;
+ struct sk_buff *clone;
+ int ret;
if (unlikely(flags & ~(BPF_F_INGRESS)))
return -EINVAL;
@@ -1619,14 +1718,22 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
if (unlikely(!dev))
return -EINVAL;
- skb = skb_clone(skb, GFP_ATOMIC);
- if (unlikely(!skb))
+ clone = skb_clone(skb, GFP_ATOMIC);
+ if (unlikely(!clone))
return -ENOMEM;
- bpf_push_mac_rcsum(skb);
+ /* For direct write, we need to keep the invariant that the skbs
+ * we're dealing with need to be uncloned. Should uncloning fail
+ * here, we need to free the just generated clone to unclone once
+ * again.
+ */
+ ret = bpf_try_make_head_writable(skb);
+ if (unlikely(ret)) {
+ kfree_skb(clone);
+ return -ENOMEM;
+ }
- return flags & BPF_F_INGRESS ?
- __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
+ return __bpf_redirect(clone, dev, flags);
}
static const struct bpf_func_proto bpf_clone_redirect_proto = {
@@ -1645,7 +1752,7 @@ struct redirect_info {
static DEFINE_PER_CPU(struct redirect_info, redirect_info);
-static u64 bpf_redirect(u64 ifindex, u64 flags, u64 r3, u64 r4, u64 r5)
+BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
{
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
@@ -1670,10 +1777,7 @@ int skb_do_redirect(struct sk_buff *skb)
return -EINVAL;
}
- bpf_push_mac_rcsum(skb);
-
- return ri->flags & BPF_F_INGRESS ?
- __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
+ return __bpf_redirect(skb, dev, ri->flags);
}
static const struct bpf_func_proto bpf_redirect_proto = {
@@ -1684,9 +1788,9 @@ static const struct bpf_func_proto bpf_redirect_proto = {
.arg2_type = ARG_ANYTHING,
};
-static u64 bpf_get_cgroup_classid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
{
- return task_get_classid((struct sk_buff *) (unsigned long) r1);
+ return task_get_classid(skb);
}
static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
@@ -1696,9 +1800,9 @@ static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
-static u64 bpf_get_route_realm(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_1(bpf_get_route_realm, const struct sk_buff *, skb)
{
- return dst_tclassid((struct sk_buff *) (unsigned long) r1);
+ return dst_tclassid(skb);
}
static const struct bpf_func_proto bpf_get_route_realm_proto = {
@@ -1708,14 +1812,14 @@ static const struct bpf_func_proto bpf_get_route_realm_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
-static u64 bpf_get_hash_recalc(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_1(bpf_get_hash_recalc, struct sk_buff *, skb)
{
/* If skb_clear_hash() was called due to mangling, we can
* trigger SW recalculation here. Later access to hash
* can then use the inline skb->hash via context directly
* instead of calling this helper again.
*/
- return skb_get_hash((struct sk_buff *) (unsigned long) r1);
+ return skb_get_hash(skb);
}
static const struct bpf_func_proto bpf_get_hash_recalc_proto = {
@@ -1725,10 +1829,25 @@ static const struct bpf_func_proto bpf_get_hash_recalc_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
-static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5)
+BPF_CALL_1(bpf_set_hash_invalid, struct sk_buff *, skb)
+{
+ /* After all direct packet write, this can be used once for
+ * triggering a lazy recalc on next skb_get_hash() invocation.
+ */
+ skb_clear_hash(skb);
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_set_hash_invalid_proto = {
+ .func = bpf_set_hash_invalid,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
+BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto,
+ u16, vlan_tci)
{
- struct sk_buff *skb = (struct sk_buff *) (long) r1;
- __be16 vlan_proto = (__force __be16) r2;
int ret;
if (unlikely(vlan_proto != htons(ETH_P_8021Q) &&
@@ -1753,9 +1872,8 @@ const struct bpf_func_proto bpf_skb_vlan_push_proto = {
};
EXPORT_SYMBOL_GPL(bpf_skb_vlan_push_proto);
-static u64 bpf_skb_vlan_pop(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb)
{
- struct sk_buff *skb = (struct sk_buff *) (long) r1;
int ret;
bpf_push_mac_rcsum(skb);
@@ -1930,10 +2048,9 @@ static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto)
return -ENOTSUPP;
}
-static u64 bpf_skb_change_proto(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
+BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
+ u64, flags)
{
- struct sk_buff *skb = (struct sk_buff *) (long) r1;
- __be16 proto = (__force __be16) r2;
int ret;
if (unlikely(flags))
@@ -1970,14 +2087,11 @@ static const struct bpf_func_proto bpf_skb_change_proto_proto = {
.arg3_type = ARG_ANYTHING,
};
-static u64 bpf_skb_change_type(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_2(bpf_skb_change_type, struct sk_buff *, skb, u32, pkt_type)
{
- struct sk_buff *skb = (struct sk_buff *) (long) r1;
- u32 pkt_type = r2;
-
/* We only allow a restricted subset to be changed for now. */
- if (unlikely(skb->pkt_type > PACKET_OTHERHOST ||
- pkt_type > PACKET_OTHERHOST))
+ if (unlikely(!skb_pkt_type_ok(skb->pkt_type) ||
+ !skb_pkt_type_ok(pkt_type)))
return -EINVAL;
skb->pkt_type = pkt_type;
@@ -1992,19 +2106,100 @@ static const struct bpf_func_proto bpf_skb_change_type_proto = {
.arg2_type = ARG_ANYTHING,
};
+static u32 __bpf_skb_min_len(const struct sk_buff *skb)
+{
+ u32 min_len = skb_network_offset(skb);
+
+ if (skb_transport_header_was_set(skb))
+ min_len = skb_transport_offset(skb);
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ min_len = skb_checksum_start_offset(skb) +
+ skb->csum_offset + sizeof(__sum16);
+ return min_len;
+}
+
+static u32 __bpf_skb_max_len(const struct sk_buff *skb)
+{
+ return skb->dev->mtu + skb->dev->hard_header_len;
+}
+
+static int bpf_skb_grow_rcsum(struct sk_buff *skb, unsigned int new_len)
+{
+ unsigned int old_len = skb->len;
+ int ret;
+
+ ret = __skb_grow_rcsum(skb, new_len);
+ if (!ret)
+ memset(skb->data + old_len, 0, new_len - old_len);
+ return ret;
+}
+
+static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len)
+{
+ return __skb_trim_rcsum(skb, new_len);
+}
+
+BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
+ u64, flags)
+{
+ u32 max_len = __bpf_skb_max_len(skb);
+ u32 min_len = __bpf_skb_min_len(skb);
+ int ret;
+
+ if (unlikely(flags || new_len > max_len || new_len < min_len))
+ return -EINVAL;
+ if (skb->encapsulation)
+ return -ENOTSUPP;
+
+ /* The basic idea of this helper is that it's performing the
+ * needed work to either grow or trim an skb, and eBPF program
+ * rewrites the rest via helpers like bpf_skb_store_bytes(),
+ * bpf_lX_csum_replace() and others rather than passing a raw
+ * buffer here. This one is a slow path helper and intended
+ * for replies with control messages.
+ *
+ * Like in bpf_skb_change_proto(), we want to keep this rather
+ * minimal and without protocol specifics so that we are able
+ * to separate concerns as in bpf_skb_store_bytes() should only
+ * be the one responsible for writing buffers.
+ *
+ * It's really expected to be a slow path operation here for
+ * control message replies, so we're implicitly linearizing,
+ * uncloning and drop offloads from the skb by this.
+ */
+ ret = __bpf_try_make_writable(skb, skb->len);
+ if (!ret) {
+ if (new_len > skb->len)
+ ret = bpf_skb_grow_rcsum(skb, new_len);
+ else if (new_len < skb->len)
+ ret = bpf_skb_trim_rcsum(skb, new_len);
+ if (!ret && skb_is_gso(skb))
+ skb_gso_reset(skb);
+ }
+
+ bpf_compute_data_end(skb);
+ return ret;
+}
+
+static const struct bpf_func_proto bpf_skb_change_tail_proto = {
+ .func = bpf_skb_change_tail,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+};
+
bool bpf_helper_changes_skb_data(void *func)
{
- if (func == bpf_skb_vlan_push)
- return true;
- if (func == bpf_skb_vlan_pop)
- return true;
- if (func == bpf_skb_store_bytes)
- return true;
- if (func == bpf_skb_change_proto)
- return true;
- if (func == bpf_l3_csum_replace)
- return true;
- if (func == bpf_l4_csum_replace)
+ if (func == bpf_skb_vlan_push ||
+ func == bpf_skb_vlan_pop ||
+ func == bpf_skb_store_bytes ||
+ func == bpf_skb_change_proto ||
+ func == bpf_skb_change_tail ||
+ func == bpf_skb_pull_data ||
+ func == bpf_l3_csum_replace ||
+ func == bpf_l4_csum_replace)
return true;
return false;
@@ -2023,13 +2218,10 @@ static unsigned long bpf_skb_copy(void *dst_buff, const void *skb,
return 0;
}
-static u64 bpf_skb_event_output(u64 r1, u64 r2, u64 flags, u64 r4,
- u64 meta_size)
+BPF_CALL_5(bpf_skb_event_output, struct sk_buff *, skb, struct bpf_map *, map,
+ u64, flags, void *, meta, u64, meta_size)
{
- struct sk_buff *skb = (struct sk_buff *)(long) r1;
- struct bpf_map *map = (struct bpf_map *)(long) r2;
u64 skb_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
- void *meta = (void *)(long) r4;
if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
return -EINVAL;
@@ -2056,10 +2248,9 @@ static unsigned short bpf_tunnel_key_af(u64 flags)
return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET;
}
-static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
+BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key *, to,
+ u32, size, u64, flags)
{
- struct sk_buff *skb = (struct sk_buff *) (long) r1;
- struct bpf_tunnel_key *to = (struct bpf_tunnel_key *) (long) r2;
const struct ip_tunnel_info *info = skb_tunnel_info(skb);
u8 compat[sizeof(struct bpf_tunnel_key)];
void *to_orig = to;
@@ -2124,10 +2315,8 @@ static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
.arg4_type = ARG_ANYTHING,
};
-static u64 bpf_skb_get_tunnel_opt(u64 r1, u64 r2, u64 size, u64 r4, u64 r5)
+BPF_CALL_3(bpf_skb_get_tunnel_opt, struct sk_buff *, skb, u8 *, to, u32, size)
{
- struct sk_buff *skb = (struct sk_buff *) (long) r1;
- u8 *to = (u8 *) (long) r2;
const struct ip_tunnel_info *info = skb_tunnel_info(skb);
int err;
@@ -2162,10 +2351,9 @@ static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = {
static struct metadata_dst __percpu *md_dst;
-static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
+BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
+ const struct bpf_tunnel_key *, from, u32, size, u64, flags)
{
- struct sk_buff *skb = (struct sk_buff *) (long) r1;
- struct bpf_tunnel_key *from = (struct bpf_tunnel_key *) (long) r2;
struct metadata_dst *md = this_cpu_ptr(md_dst);
u8 compat[sizeof(struct bpf_tunnel_key)];
struct ip_tunnel_info *info;
@@ -2183,7 +2371,7 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
*/
memcpy(compat, from, size);
memset(compat + size, 0, sizeof(compat) - size);
- from = (struct bpf_tunnel_key *)compat;
+ from = (const struct bpf_tunnel_key *) compat;
break;
default:
return -EINVAL;
@@ -2233,10 +2421,9 @@ static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
.arg4_type = ARG_ANYTHING,
};
-static u64 bpf_skb_set_tunnel_opt(u64 r1, u64 r2, u64 size, u64 r4, u64 r5)
+BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
+ const u8 *, from, u32, size)
{
- struct sk_buff *skb = (struct sk_buff *) (long) r1;
- u8 *from = (u8 *) (long) r2;
struct ip_tunnel_info *info = skb_tunnel_info(skb);
const struct metadata_dst *md = this_cpu_ptr(md_dst);
@@ -2282,28 +2469,24 @@ bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
}
}
-#ifdef CONFIG_SOCK_CGROUP_DATA
-static u64 bpf_skb_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_3(bpf_skb_under_cgroup, struct sk_buff *, skb, struct bpf_map *, map,
+ u32, idx)
{
- struct sk_buff *skb = (struct sk_buff *)(long)r1;
- struct bpf_map *map = (struct bpf_map *)(long)r2;
struct bpf_array *array = container_of(map, struct bpf_array, map);
struct cgroup *cgrp;
struct sock *sk;
- u32 i = (u32)r3;
- sk = skb->sk;
+ sk = skb_to_full_sk(skb);
if (!sk || !sk_fullsock(sk))
return -ENOENT;
-
- if (unlikely(i >= array->map.max_entries))
+ if (unlikely(idx >= array->map.max_entries))
return -E2BIG;
- cgrp = READ_ONCE(array->ptrs[i]);
+ cgrp = READ_ONCE(array->ptrs[idx]);
if (unlikely(!cgrp))
return -EAGAIN;
- return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data), cgrp);
+ return sk_under_cgroup_hierarchy(sk, cgrp);
}
static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
@@ -2314,7 +2497,38 @@ static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
};
-#endif
+
+static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
+ unsigned long off, unsigned long len)
+{
+ memcpy(dst_buff, src_buff + off, len);
+ return 0;
+}
+
+BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map,
+ u64, flags, void *, meta, u64, meta_size)
+{
+ u64 xdp_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
+
+ if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
+ return -EINVAL;
+ if (unlikely(xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
+ return -EFAULT;
+
+ return bpf_event_output(map, flags, meta, meta_size, xdp, xdp_size,
+ bpf_xdp_copy);
+}
+
+static const struct bpf_func_proto bpf_xdp_event_output_proto = {
+ .func = bpf_xdp_event_output,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_STACK,
+ .arg5_type = ARG_CONST_STACK_SIZE,
+};
static const struct bpf_func_proto *
sk_filter_func_proto(enum bpf_func_id func_id)
@@ -2350,8 +2564,12 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
return &bpf_skb_store_bytes_proto;
case BPF_FUNC_skb_load_bytes:
return &bpf_skb_load_bytes_proto;
+ case BPF_FUNC_skb_pull_data:
+ return &bpf_skb_pull_data_proto;
case BPF_FUNC_csum_diff:
return &bpf_csum_diff_proto;
+ case BPF_FUNC_csum_update:
+ return &bpf_csum_update_proto;
case BPF_FUNC_l3_csum_replace:
return &bpf_l3_csum_replace_proto;
case BPF_FUNC_l4_csum_replace:
@@ -2368,6 +2586,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
return &bpf_skb_change_proto_proto;
case BPF_FUNC_skb_change_type:
return &bpf_skb_change_type_proto;
+ case BPF_FUNC_skb_change_tail:
+ return &bpf_skb_change_tail_proto;
case BPF_FUNC_skb_get_tunnel_key:
return &bpf_skb_get_tunnel_key_proto;
case BPF_FUNC_skb_set_tunnel_key:
@@ -2382,14 +2602,14 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
return &bpf_get_route_realm_proto;
case BPF_FUNC_get_hash_recalc:
return &bpf_get_hash_recalc_proto;
+ case BPF_FUNC_set_hash_invalid:
+ return &bpf_set_hash_invalid_proto;
case BPF_FUNC_perf_event_output:
return &bpf_skb_event_output_proto;
case BPF_FUNC_get_smp_processor_id:
return &bpf_get_smp_processor_id_proto;
-#ifdef CONFIG_SOCK_CGROUP_DATA
case BPF_FUNC_skb_under_cgroup:
return &bpf_skb_under_cgroup_proto;
-#endif
default:
return sk_filter_func_proto(func_id);
}
@@ -2398,7 +2618,14 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
static const struct bpf_func_proto *
xdp_func_proto(enum bpf_func_id func_id)
{
- return sk_filter_func_proto(func_id);
+ switch (func_id) {
+ case BPF_FUNC_perf_event_output:
+ return &bpf_xdp_event_output_proto;
+ case BPF_FUNC_get_smp_processor_id:
+ return &bpf_get_smp_processor_id_proto;
+ default:
+ return sk_filter_func_proto(func_id);
+ }
}
static bool __is_valid_access(int off, int size, enum bpf_access_type type)
@@ -2438,6 +2665,45 @@ static bool sk_filter_is_valid_access(int off, int size,
return __is_valid_access(off, size, type);
}
+static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
+ const struct bpf_prog *prog)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ if (!direct_write)
+ return 0;
+
+ /* if (!skb->cloned)
+ * goto start;
+ *
+ * (Fast-path, otherwise approximation that we might be
+ * a clone, do the rest in helper.)
+ */
+ *insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_6, BPF_REG_1, CLONED_OFFSET());
+ *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_6, CLONED_MASK);
+ *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 7);
+
+ /* ret = bpf_skb_pull_data(skb, 0); */
+ *insn++ = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
+ *insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_2, BPF_REG_2);
+ *insn++ = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_pull_data);
+ /* if (!ret)
+ * goto restore;
+ * return TC_ACT_SHOT;
+ */
+ *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2);
+ *insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, TC_ACT_SHOT);
+ *insn++ = BPF_EXIT_INSN();
+
+ /* restore: */
+ *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
+ /* start: */
+ *insn++ = prog->insnsi[0];
+
+ return insn - insn_buf;
+}
+
static bool tc_cls_act_is_valid_access(int off, int size,
enum bpf_access_type type,
enum bpf_reg_type *reg_type)
@@ -2475,7 +2741,7 @@ static bool __is_valid_xdp_access(int off, int size,
return false;
if (off % size != 0)
return false;
- if (size != 4)
+ if (size != sizeof(__u32))
return false;
return true;
@@ -2506,10 +2772,10 @@ void bpf_warn_invalid_xdp_action(u32 act)
}
EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
-static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
- int src_reg, int ctx_off,
- struct bpf_insn *insn_buf,
- struct bpf_prog *prog)
+static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg,
+ int src_reg, int ctx_off,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog)
{
struct bpf_insn *insn = insn_buf;
@@ -2556,7 +2822,7 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
case offsetof(struct __sk_buff, ifindex):
BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
- *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)),
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
dst_reg, src_reg,
offsetof(struct sk_buff, dev));
*insn++ = BPF_JMP_IMM(BPF_JEQ, dst_reg, 0, 1);
@@ -2597,7 +2863,7 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
dst_reg, src_reg, insn);
case offsetof(struct __sk_buff, cb[0]) ...
- offsetof(struct __sk_buff, cb[4]):
+ offsetof(struct __sk_buff, cb[4]):
BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20);
prog->cb_access = 1;
@@ -2621,7 +2887,7 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
break;
case offsetof(struct __sk_buff, data):
- *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, data)),
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
dst_reg, src_reg,
offsetof(struct sk_buff, data));
break;
@@ -2630,8 +2896,8 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
ctx_off -= offsetof(struct __sk_buff, data_end);
ctx_off += offsetof(struct sk_buff, cb);
ctx_off += offsetof(struct bpf_skb_data_end, data_end);
- *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(sizeof(void *)),
- dst_reg, src_reg, ctx_off);
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), dst_reg, src_reg,
+ ctx_off);
break;
case offsetof(struct __sk_buff, tc_index):
@@ -2657,6 +2923,31 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
return insn - insn_buf;
}
+static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, int dst_reg,
+ int src_reg, int ctx_off,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ switch (ctx_off) {
+ case offsetof(struct __sk_buff, ifindex):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
+
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
+ dst_reg, src_reg,
+ offsetof(struct sk_buff, dev));
+ *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, dst_reg,
+ offsetof(struct net_device, ifindex));
+ break;
+ default:
+ return sk_filter_convert_ctx_access(type, dst_reg, src_reg,
+ ctx_off, insn_buf, prog);
+ }
+
+ return insn - insn_buf;
+}
+
static u32 xdp_convert_ctx_access(enum bpf_access_type type, int dst_reg,
int src_reg, int ctx_off,
struct bpf_insn *insn_buf,
@@ -2666,12 +2957,12 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, int dst_reg,
switch (ctx_off) {
case offsetof(struct xdp_md, data):
- *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct xdp_buff, data)),
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data),
dst_reg, src_reg,
offsetof(struct xdp_buff, data));
break;
case offsetof(struct xdp_md, data_end):
- *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct xdp_buff, data_end)),
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end),
dst_reg, src_reg,
offsetof(struct xdp_buff, data_end));
break;
@@ -2683,13 +2974,14 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, int dst_reg,
static const struct bpf_verifier_ops sk_filter_ops = {
.get_func_proto = sk_filter_func_proto,
.is_valid_access = sk_filter_is_valid_access,
- .convert_ctx_access = bpf_net_convert_ctx_access,
+ .convert_ctx_access = sk_filter_convert_ctx_access,
};
static const struct bpf_verifier_ops tc_cls_act_ops = {
.get_func_proto = tc_cls_act_func_proto,
.is_valid_access = tc_cls_act_is_valid_access,
- .convert_ctx_access = bpf_net_convert_ctx_access,
+ .convert_ctx_access = tc_cls_act_convert_ctx_access,
+ .gen_prologue = tc_cls_act_prologue,
};
static const struct bpf_verifier_ops xdp_ops = {
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 52742a02814f..69e4463a4b1b 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -6,6 +6,8 @@
#include <linux/if_vlan.h>
#include <net/ip.h>
#include <net/ipv6.h>
+#include <net/gre.h>
+#include <net/pptp.h>
#include <linux/igmp.h>
#include <linux/icmp.h>
#include <linux/sctp.h>
@@ -116,13 +118,16 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
struct flow_dissector_key_addrs *key_addrs;
struct flow_dissector_key_ports *key_ports;
struct flow_dissector_key_tags *key_tags;
+ struct flow_dissector_key_vlan *key_vlan;
struct flow_dissector_key_keyid *key_keyid;
+ bool skip_vlan = false;
u8 ip_proto = 0;
- bool ret = false;
+ bool ret;
if (!data) {
data = skb->data;
- proto = skb->protocol;
+ proto = skb_vlan_tag_present(skb) ?
+ skb->vlan_proto : skb->protocol;
nhoff = skb_network_offset(skb);
hlen = skb_headlen(skb);
}
@@ -242,22 +247,42 @@ ipv6:
case htons(ETH_P_8021Q): {
const struct vlan_hdr *vlan;
struct vlan_hdr _vlan;
+ bool vlan_tag_present = skb && skb_vlan_tag_present(skb);
- vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), data, hlen, &_vlan);
- if (!vlan)
- goto out_bad;
+ if (vlan_tag_present)
+ proto = skb->protocol;
+
+ if (!vlan_tag_present || eth_type_vlan(skb->protocol)) {
+ vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan),
+ data, hlen, &_vlan);
+ if (!vlan)
+ goto out_bad;
+ proto = vlan->h_vlan_encapsulated_proto;
+ nhoff += sizeof(*vlan);
+ if (skip_vlan)
+ goto again;
+ }
+ skip_vlan = true;
if (dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_VLANID)) {
- key_tags = skb_flow_dissector_target(flow_dissector,
- FLOW_DISSECTOR_KEY_VLANID,
+ FLOW_DISSECTOR_KEY_VLAN)) {
+ key_vlan = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_VLAN,
target_container);
- key_tags->vlan_id = skb_vlan_tag_get_id(skb);
+ if (vlan_tag_present) {
+ key_vlan->vlan_id = skb_vlan_tag_get_id(skb);
+ key_vlan->vlan_priority =
+ (skb_vlan_tag_get_prio(skb) >> VLAN_PRIO_SHIFT);
+ } else {
+ key_vlan->vlan_id = ntohs(vlan->h_vlan_TCI) &
+ VLAN_VID_MASK;
+ key_vlan->vlan_priority =
+ (ntohs(vlan->h_vlan_TCI) &
+ VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+ }
}
- proto = vlan->h_vlan_encapsulated_proto;
- nhoff += sizeof(*vlan);
goto again;
}
case htons(ETH_P_PPP_SES): {
@@ -338,32 +363,42 @@ mpls:
ip_proto_again:
switch (ip_proto) {
case IPPROTO_GRE: {
- struct gre_hdr {
- __be16 flags;
- __be16 proto;
- } *hdr, _hdr;
+ struct gre_base_hdr *hdr, _hdr;
+ u16 gre_ver;
+ int offset = 0;
hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
if (!hdr)
goto out_bad;
- /*
- * Only look inside GRE if version zero and no
- * routing
- */
- if (hdr->flags & (GRE_VERSION | GRE_ROUTING))
+
+ /* Only look inside GRE without routing */
+ if (hdr->flags & GRE_ROUTING)
break;
- proto = hdr->proto;
- nhoff += 4;
+ /* Only look inside GRE for version 0 and 1 */
+ gre_ver = ntohs(hdr->flags & GRE_VERSION);
+ if (gre_ver > 1)
+ break;
+
+ proto = hdr->protocol;
+ if (gre_ver) {
+ /* Version1 must be PPTP, and check the flags */
+ if (!(proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY)))
+ break;
+ }
+
+ offset += sizeof(struct gre_base_hdr);
+
if (hdr->flags & GRE_CSUM)
- nhoff += 4;
+ offset += sizeof(((struct gre_full_hdr *)0)->csum) +
+ sizeof(((struct gre_full_hdr *)0)->reserved1);
+
if (hdr->flags & GRE_KEY) {
const __be32 *keyid;
__be32 _keyid;
- keyid = __skb_header_pointer(skb, nhoff, sizeof(_keyid),
+ keyid = __skb_header_pointer(skb, nhoff + offset, sizeof(_keyid),
data, hlen, &_keyid);
-
if (!keyid)
goto out_bad;
@@ -372,32 +407,65 @@ ip_proto_again:
key_keyid = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_GRE_KEYID,
target_container);
- key_keyid->keyid = *keyid;
+ if (gre_ver == 0)
+ key_keyid->keyid = *keyid;
+ else
+ key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK;
}
- nhoff += 4;
+ offset += sizeof(((struct gre_full_hdr *)0)->key);
}
+
if (hdr->flags & GRE_SEQ)
- nhoff += 4;
- if (proto == htons(ETH_P_TEB)) {
- const struct ethhdr *eth;
- struct ethhdr _eth;
-
- eth = __skb_header_pointer(skb, nhoff,
- sizeof(_eth),
- data, hlen, &_eth);
- if (!eth)
+ offset += sizeof(((struct pptp_gre_header *)0)->seq);
+
+ if (gre_ver == 0) {
+ if (proto == htons(ETH_P_TEB)) {
+ const struct ethhdr *eth;
+ struct ethhdr _eth;
+
+ eth = __skb_header_pointer(skb, nhoff + offset,
+ sizeof(_eth),
+ data, hlen, &_eth);
+ if (!eth)
+ goto out_bad;
+ proto = eth->h_proto;
+ offset += sizeof(*eth);
+
+ /* Cap headers that we access via pointers at the
+ * end of the Ethernet header as our maximum alignment
+ * at that point is only 2 bytes.
+ */
+ if (NET_IP_ALIGN)
+ hlen = (nhoff + offset);
+ }
+ } else { /* version 1, must be PPTP */
+ u8 _ppp_hdr[PPP_HDRLEN];
+ u8 *ppp_hdr;
+
+ if (hdr->flags & GRE_ACK)
+ offset += sizeof(((struct pptp_gre_header *)0)->ack);
+
+ ppp_hdr = skb_header_pointer(skb, nhoff + offset,
+ sizeof(_ppp_hdr), _ppp_hdr);
+ if (!ppp_hdr)
goto out_bad;
- proto = eth->h_proto;
- nhoff += sizeof(*eth);
-
- /* Cap headers that we access via pointers at the
- * end of the Ethernet header as our maximum alignment
- * at that point is only 2 bytes.
- */
- if (NET_IP_ALIGN)
- hlen = nhoff;
+
+ switch (PPP_PROTOCOL(ppp_hdr)) {
+ case PPP_IP:
+ proto = htons(ETH_P_IP);
+ break;
+ case PPP_IPV6:
+ proto = htons(ETH_P_IPV6);
+ break;
+ default:
+ /* Could probably catch some more like MPLS */
+ break;
+ }
+
+ offset += PPP_HDRLEN;
}
+ nhoff += offset;
key_control->flags |= FLOW_DIS_ENCAPSULATION;
if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
goto out_good;
@@ -481,12 +549,17 @@ ip_proto_again:
out_good:
ret = true;
-out_bad:
+ key_control->thoff = (u16)nhoff;
+out:
key_basic->n_proto = proto;
key_basic->ip_proto = ip_proto;
- key_control->thoff = (u16)nhoff;
return ret;
+
+out_bad:
+ ret = false;
+ key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen);
+ goto out;
}
EXPORT_SYMBOL(__skb_flow_dissect);
@@ -874,8 +947,8 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = {
.offset = offsetof(struct flow_keys, ports),
},
{
- .key_id = FLOW_DISSECTOR_KEY_VLANID,
- .offset = offsetof(struct flow_keys, tags),
+ .key_id = FLOW_DISSECTOR_KEY_VLAN,
+ .offset = offsetof(struct flow_keys, vlan),
},
{
.key_id = FLOW_DISSECTOR_KEY_FLOW_LABEL,
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index 669ecc9f884e..e5f84c26ba1a 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -251,6 +251,41 @@ drop:
}
EXPORT_SYMBOL(lwtunnel_output);
+int lwtunnel_xmit(struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ const struct lwtunnel_encap_ops *ops;
+ struct lwtunnel_state *lwtstate;
+ int ret = -EINVAL;
+
+ if (!dst)
+ goto drop;
+
+ lwtstate = dst->lwtstate;
+
+ if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
+ lwtstate->type > LWTUNNEL_ENCAP_MAX)
+ return 0;
+
+ ret = -EOPNOTSUPP;
+ rcu_read_lock();
+ ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
+ if (likely(ops && ops->xmit))
+ ret = ops->xmit(skb);
+ rcu_read_unlock();
+
+ if (ret == -EOPNOTSUPP)
+ goto drop;
+
+ return ret;
+
+drop:
+ kfree_skb(skb);
+
+ return ret;
+}
+EXPORT_SYMBOL(lwtunnel_xmit);
+
int lwtunnel_input(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index cf26e04c4046..2ae929f9bd06 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1148,7 +1148,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
} else
goto out;
} else {
- if (lladdr == neigh->ha && new == NUD_STALE)
+ if (lladdr == neigh->ha && new == NUD_STALE &&
+ !(flags & NEIGH_UPDATE_F_ADMIN))
new = old;
}
}
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 2c2eb1b629b1..7001da910c6b 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -37,6 +37,8 @@ struct net init_net = {
};
EXPORT_SYMBOL(init_net);
+static bool init_net_initialized;
+
#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
@@ -217,6 +219,8 @@ int peernet2id_alloc(struct net *net, struct net *peer)
bool alloc;
int id;
+ if (atomic_read(&net->count) == 0)
+ return NETNSA_NSID_NOT_ASSIGNED;
spin_lock_irqsave(&net->nsid_lock, flags);
alloc = atomic_read(&peer->count) == 0 ? false : true;
id = __peernet2id_alloc(net, peer, &alloc);
@@ -225,7 +229,6 @@ int peernet2id_alloc(struct net *net, struct net *peer)
rtnl_net_notifyid(net, RTM_NEWNSID, id);
return id;
}
-EXPORT_SYMBOL(peernet2id_alloc);
/* This function returns, if assigned, the id of a peer netns. */
int peernet2id(struct net *net, struct net *peer)
@@ -238,6 +241,7 @@ int peernet2id(struct net *net, struct net *peer)
spin_unlock_irqrestore(&net->nsid_lock, flags);
return id;
}
+EXPORT_SYMBOL(peernet2id);
/* This function returns true is the peer netns has an id assigned into the
* current netns.
@@ -310,6 +314,16 @@ out_undo:
#ifdef CONFIG_NET_NS
+static struct ucounts *inc_net_namespaces(struct user_namespace *ns)
+{
+ return inc_ucount(ns, current_euid(), UCOUNT_NET_NAMESPACES);
+}
+
+static void dec_net_namespaces(struct ucounts *ucounts)
+{
+ dec_ucount(ucounts, UCOUNT_NET_NAMESPACES);
+}
+
static struct kmem_cache *net_cachep;
static struct workqueue_struct *netns_wq;
@@ -351,19 +365,27 @@ void net_drop_ns(void *p)
struct net *copy_net_ns(unsigned long flags,
struct user_namespace *user_ns, struct net *old_net)
{
+ struct ucounts *ucounts;
struct net *net;
int rv;
if (!(flags & CLONE_NEWNET))
return get_net(old_net);
+ ucounts = inc_net_namespaces(user_ns);
+ if (!ucounts)
+ return ERR_PTR(-ENOSPC);
+
net = net_alloc();
- if (!net)
+ if (!net) {
+ dec_net_namespaces(ucounts);
return ERR_PTR(-ENOMEM);
+ }
get_user_ns(user_ns);
mutex_lock(&net_mutex);
+ net->ucounts = ucounts;
rv = setup_net(net, user_ns);
if (rv == 0) {
rtnl_lock();
@@ -372,6 +394,7 @@ struct net *copy_net_ns(unsigned long flags,
}
mutex_unlock(&net_mutex);
if (rv < 0) {
+ dec_net_namespaces(ucounts);
put_user_ns(user_ns);
net_drop_ns(net);
return ERR_PTR(rv);
@@ -444,6 +467,7 @@ static void cleanup_net(struct work_struct *work)
/* Finally it is safe to free my network namespace structure */
list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
list_del_init(&net->exit_list);
+ dec_net_namespaces(net->ucounts);
put_user_ns(net->user_ns);
net_drop_ns(net);
}
@@ -531,7 +555,7 @@ static struct pernet_operations __net_initdata net_ns_ops = {
.exit = net_ns_net_exit,
};
-static struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
+static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
[NETNSA_NONE] = { .type = NLA_UNSPEC },
[NETNSA_NSID] = { .type = NLA_S32 },
[NETNSA_PID] = { .type = NLA_U32 },
@@ -750,6 +774,8 @@ static int __init net_ns_init(void)
if (setup_net(&init_net, &init_user_ns))
panic("Could not setup the initial network namespace");
+ init_net_initialized = true;
+
rtnl_lock();
list_add_tail_rcu(&init_net.list, &net_namespace_list);
rtnl_unlock();
@@ -811,15 +837,24 @@ static void __unregister_pernet_operations(struct pernet_operations *ops)
static int __register_pernet_operations(struct list_head *list,
struct pernet_operations *ops)
{
+ if (!init_net_initialized) {
+ list_add_tail(&ops->list, list);
+ return 0;
+ }
+
return ops_init(ops, &init_net);
}
static void __unregister_pernet_operations(struct pernet_operations *ops)
{
- LIST_HEAD(net_exit_list);
- list_add(&init_net.exit_list, &net_exit_list);
- ops_exit_list(ops, &net_exit_list);
- ops_free_list(ops, &net_exit_list);
+ if (!init_net_initialized) {
+ list_del(&ops->list);
+ } else {
+ LIST_HEAD(net_exit_list);
+ list_add(&init_net.exit_list, &net_exit_list);
+ ops_exit_list(ops, &net_exit_list);
+ ops_free_list(ops, &net_exit_list);
+ }
}
#endif /* CONFIG_NET_NS */
@@ -996,11 +1031,17 @@ static int netns_install(struct nsproxy *nsproxy, struct ns_common *ns)
return 0;
}
+static struct user_namespace *netns_owner(struct ns_common *ns)
+{
+ return to_net_ns(ns)->user_ns;
+}
+
const struct proc_ns_operations netns_operations = {
.name = "net",
.type = CLONE_NEWNET,
.get = netns_get,
.put = netns_put,
.install = netns_install,
+ .owner = netns_owner,
};
#endif
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index bbd118b19aef..306b8f0e03c1 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -216,8 +216,8 @@
#define M_QUEUE_XMIT 2 /* Inject packet into qdisc */
/* If lock -- protects updating of if_list */
-#define if_lock(t) spin_lock(&(t->if_lock));
-#define if_unlock(t) spin_unlock(&(t->if_lock));
+#define if_lock(t) mutex_lock(&(t->if_lock));
+#define if_unlock(t) mutex_unlock(&(t->if_lock));
/* Used to help with determining the pkts on receive */
#define PKTGEN_MAGIC 0xbe9be955
@@ -423,7 +423,7 @@ struct pktgen_net {
};
struct pktgen_thread {
- spinlock_t if_lock; /* for list of devices */
+ struct mutex if_lock; /* for list of devices */
struct list_head if_list; /* All device here */
struct list_head th_list;
struct task_struct *tsk;
@@ -2010,11 +2010,13 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d
{
struct pktgen_thread *t;
+ mutex_lock(&pktgen_thread_lock);
+
list_for_each_entry(t, &pn->pktgen_threads, th_list) {
struct pktgen_dev *pkt_dev;
- rcu_read_lock();
- list_for_each_entry_rcu(pkt_dev, &t->if_list, list) {
+ if_lock(t);
+ list_for_each_entry(pkt_dev, &t->if_list, list) {
if (pkt_dev->odev != dev)
continue;
@@ -2029,8 +2031,9 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d
dev->name);
break;
}
- rcu_read_unlock();
+ if_unlock(t);
}
+ mutex_unlock(&pktgen_thread_lock);
}
static int pktgen_device_event(struct notifier_block *unused,
@@ -2286,7 +2289,7 @@ out:
static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev)
{
- pkt_dev->pkt_overhead = LL_RESERVED_SPACE(pkt_dev->odev);
+ pkt_dev->pkt_overhead = 0;
pkt_dev->pkt_overhead += pkt_dev->nr_labels*sizeof(u32);
pkt_dev->pkt_overhead += VLAN_TAG_SIZE(pkt_dev);
pkt_dev->pkt_overhead += SVLAN_TAG_SIZE(pkt_dev);
@@ -2777,13 +2780,13 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
}
static struct sk_buff *pktgen_alloc_skb(struct net_device *dev,
- struct pktgen_dev *pkt_dev,
- unsigned int extralen)
+ struct pktgen_dev *pkt_dev)
{
+ unsigned int extralen = LL_RESERVED_SPACE(dev);
struct sk_buff *skb = NULL;
- unsigned int size = pkt_dev->cur_pkt_size + 64 + extralen +
- pkt_dev->pkt_overhead;
+ unsigned int size;
+ size = pkt_dev->cur_pkt_size + 64 + extralen + pkt_dev->pkt_overhead;
if (pkt_dev->flags & F_NODE) {
int node = pkt_dev->node >= 0 ? pkt_dev->node : numa_node_id();
@@ -2796,8 +2799,9 @@ static struct sk_buff *pktgen_alloc_skb(struct net_device *dev,
skb = __netdev_alloc_skb(dev, size, GFP_NOWAIT);
}
+ /* the caller pre-fetches from skb->data and reserves for the mac hdr */
if (likely(skb))
- skb_reserve(skb, LL_RESERVED_SPACE(dev));
+ skb_reserve(skb, extralen - 16);
return skb;
}
@@ -2830,16 +2834,14 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
mod_cur_headers(pkt_dev);
queue_map = pkt_dev->cur_queue_map;
- datalen = (odev->hard_header_len + 16) & ~0xf;
-
- skb = pktgen_alloc_skb(odev, pkt_dev, datalen);
+ skb = pktgen_alloc_skb(odev, pkt_dev);
if (!skb) {
sprintf(pkt_dev->result, "No memory");
return NULL;
}
prefetchw(skb->data);
- skb_reserve(skb, datalen);
+ skb_reserve(skb, 16);
/* Reserve for ethernet and IP header */
eth = (__u8 *) skb_push(skb, 14);
@@ -2959,7 +2961,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
mod_cur_headers(pkt_dev);
queue_map = pkt_dev->cur_queue_map;
- skb = pktgen_alloc_skb(odev, pkt_dev, 16);
+ skb = pktgen_alloc_skb(odev, pkt_dev);
if (!skb) {
sprintf(pkt_dev->result, "No memory");
return NULL;
@@ -3763,7 +3765,7 @@ static int __net_init pktgen_create_thread(int cpu, struct pktgen_net *pn)
return -ENOMEM;
}
- spin_lock_init(&t->if_lock);
+ mutex_init(&t->if_lock);
t->cpu = cpu;
INIT_LIST_HEAD(&t->if_list);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 189cc78c77eb..a99917b5de33 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -275,6 +275,7 @@ int rtnl_unregister(int protocol, int msgtype)
rtnl_msg_handlers[protocol][msgindex].doit = NULL;
rtnl_msg_handlers[protocol][msgindex].dumpit = NULL;
+ rtnl_msg_handlers[protocol][msgindex].calcit = NULL;
return 0;
}
@@ -704,6 +705,8 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
} else if (i == RTAX_FEATURES - 1) {
u32 user_features = metrics[i] & RTAX_FEATURE_MASK;
+ if (!user_features)
+ continue;
BUILD_BUG_ON(RTAX_FEATURE_MASK & DST_FEATURE_MASK);
if (nla_put_u32(skb, i + 1, user_features))
goto nla_put_failure;
@@ -837,15 +840,20 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
if (dev->dev.parent && dev_is_pci(dev->dev.parent) &&
(ext_filter_mask & RTEXT_FILTER_VF)) {
int num_vfs = dev_num_vf(dev->dev.parent);
- size_t size = nla_total_size(sizeof(struct nlattr));
- size += nla_total_size(num_vfs * sizeof(struct nlattr));
+ size_t size = nla_total_size(0);
size += num_vfs *
- (nla_total_size(sizeof(struct ifla_vf_mac)) +
+ (nla_total_size(0) +
+ nla_total_size(sizeof(struct ifla_vf_mac)) +
nla_total_size(sizeof(struct ifla_vf_vlan)) +
+ nla_total_size(0) + /* nest IFLA_VF_VLAN_LIST */
+ nla_total_size(MAX_VLAN_LIST_LEN *
+ sizeof(struct ifla_vf_vlan_info)) +
nla_total_size(sizeof(struct ifla_vf_spoofchk)) +
+ nla_total_size(sizeof(struct ifla_vf_tx_rate)) +
nla_total_size(sizeof(struct ifla_vf_rate)) +
nla_total_size(sizeof(struct ifla_vf_link_state)) +
nla_total_size(sizeof(struct ifla_vf_rss_query_en)) +
+ nla_total_size(0) + /* nest IFLA_VF_STATS */
/* IFLA_VF_STATS_RX_PACKETS */
nla_total_size_64bit(sizeof(__u64)) +
/* IFLA_VF_STATS_TX_PACKETS */
@@ -893,7 +901,8 @@ static size_t rtnl_port_size(const struct net_device *dev,
static size_t rtnl_xdp_size(const struct net_device *dev)
{
- size_t xdp_size = nla_total_size(1); /* XDP_ATTACHED */
+ size_t xdp_size = nla_total_size(0) + /* nest IFLA_XDP */
+ nla_total_size(1); /* XDP_ATTACHED */
if (!dev->netdev_ops->ndo_xdp)
return 0;
@@ -1109,14 +1118,15 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
struct nlattr *vfinfo)
{
struct ifla_vf_rss_query_en vf_rss_query_en;
+ struct nlattr *vf, *vfstats, *vfvlanlist;
struct ifla_vf_link_state vf_linkstate;
+ struct ifla_vf_vlan_info vf_vlan_info;
struct ifla_vf_spoofchk vf_spoofchk;
struct ifla_vf_tx_rate vf_tx_rate;
struct ifla_vf_stats vf_stats;
struct ifla_vf_trust vf_trust;
struct ifla_vf_vlan vf_vlan;
struct ifla_vf_rate vf_rate;
- struct nlattr *vf, *vfstats;
struct ifla_vf_mac vf_mac;
struct ifla_vf_info ivi;
@@ -1133,11 +1143,16 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
* IFLA_VF_LINK_STATE_AUTO which equals zero
*/
ivi.linkstate = 0;
+ /* VLAN Protocol by default is 802.1Q */
+ ivi.vlan_proto = htons(ETH_P_8021Q);
if (dev->netdev_ops->ndo_get_vf_config(dev, vfs_num, &ivi))
return 0;
+ memset(&vf_vlan_info, 0, sizeof(vf_vlan_info));
+
vf_mac.vf =
vf_vlan.vf =
+ vf_vlan_info.vf =
vf_rate.vf =
vf_tx_rate.vf =
vf_spoofchk.vf =
@@ -1148,6 +1163,9 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
vf_vlan.vlan = ivi.vlan;
vf_vlan.qos = ivi.qos;
+ vf_vlan_info.vlan = ivi.vlan;
+ vf_vlan_info.qos = ivi.qos;
+ vf_vlan_info.vlan_proto = ivi.vlan_proto;
vf_tx_rate.rate = ivi.max_tx_rate;
vf_rate.min_tx_rate = ivi.min_tx_rate;
vf_rate.max_tx_rate = ivi.max_tx_rate;
@@ -1156,10 +1174,8 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
vf_rss_query_en.setting = ivi.rss_query_en;
vf_trust.setting = ivi.trusted;
vf = nla_nest_start(skb, IFLA_VF_INFO);
- if (!vf) {
- nla_nest_cancel(skb, vfinfo);
- return -EMSGSIZE;
- }
+ if (!vf)
+ goto nla_put_vfinfo_failure;
if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate),
@@ -1175,17 +1191,23 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
&vf_rss_query_en) ||
nla_put(skb, IFLA_VF_TRUST,
sizeof(vf_trust), &vf_trust))
- return -EMSGSIZE;
+ goto nla_put_vf_failure;
+ vfvlanlist = nla_nest_start(skb, IFLA_VF_VLAN_LIST);
+ if (!vfvlanlist)
+ goto nla_put_vf_failure;
+ if (nla_put(skb, IFLA_VF_VLAN_INFO, sizeof(vf_vlan_info),
+ &vf_vlan_info)) {
+ nla_nest_cancel(skb, vfvlanlist);
+ goto nla_put_vf_failure;
+ }
+ nla_nest_end(skb, vfvlanlist);
memset(&vf_stats, 0, sizeof(vf_stats));
if (dev->netdev_ops->ndo_get_vf_stats)
dev->netdev_ops->ndo_get_vf_stats(dev, vfs_num,
&vf_stats);
vfstats = nla_nest_start(skb, IFLA_VF_STATS);
- if (!vfstats) {
- nla_nest_cancel(skb, vf);
- nla_nest_cancel(skb, vfinfo);
- return -EMSGSIZE;
- }
+ if (!vfstats)
+ goto nla_put_vf_failure;
if (nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_PACKETS,
vf_stats.rx_packets, IFLA_VF_STATS_PAD) ||
nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_PACKETS,
@@ -1197,11 +1219,19 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
nla_put_u64_64bit(skb, IFLA_VF_STATS_BROADCAST,
vf_stats.broadcast, IFLA_VF_STATS_PAD) ||
nla_put_u64_64bit(skb, IFLA_VF_STATS_MULTICAST,
- vf_stats.multicast, IFLA_VF_STATS_PAD))
- return -EMSGSIZE;
+ vf_stats.multicast, IFLA_VF_STATS_PAD)) {
+ nla_nest_cancel(skb, vfstats);
+ goto nla_put_vf_failure;
+ }
nla_nest_end(skb, vfstats);
nla_nest_end(skb, vf);
return 0;
+
+nla_put_vf_failure:
+ nla_nest_cancel(skb, vf);
+nla_put_vfinfo_failure:
+ nla_nest_cancel(skb, vfinfo);
+ return -EMSGSIZE;
}
static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
@@ -1446,6 +1476,7 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
[IFLA_VF_MAC] = { .len = sizeof(struct ifla_vf_mac) },
[IFLA_VF_VLAN] = { .len = sizeof(struct ifla_vf_vlan) },
+ [IFLA_VF_VLAN_LIST] = { .type = NLA_NESTED },
[IFLA_VF_TX_RATE] = { .len = sizeof(struct ifla_vf_tx_rate) },
[IFLA_VF_SPOOFCHK] = { .len = sizeof(struct ifla_vf_spoofchk) },
[IFLA_VF_RATE] = { .len = sizeof(struct ifla_vf_rate) },
@@ -1578,7 +1609,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
head = &net->dev_index_head[h];
hlist_for_each_entry(dev, head, index_hlist) {
if (link_dump_filtered(dev, master_idx, kind_ops))
- continue;
+ goto cont;
if (idx < s_idx)
goto cont;
err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
@@ -1702,7 +1733,37 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
err = -EOPNOTSUPP;
if (ops->ndo_set_vf_vlan)
err = ops->ndo_set_vf_vlan(dev, ivv->vf, ivv->vlan,
- ivv->qos);
+ ivv->qos,
+ htons(ETH_P_8021Q));
+ if (err < 0)
+ return err;
+ }
+
+ if (tb[IFLA_VF_VLAN_LIST]) {
+ struct ifla_vf_vlan_info *ivvl[MAX_VLAN_LIST_LEN];
+ struct nlattr *attr;
+ int rem, len = 0;
+
+ err = -EOPNOTSUPP;
+ if (!ops->ndo_set_vf_vlan)
+ return err;
+
+ nla_for_each_nested(attr, tb[IFLA_VF_VLAN_LIST], rem) {
+ if (nla_type(attr) != IFLA_VF_VLAN_INFO ||
+ nla_len(attr) < NLA_HDRLEN) {
+ return -EINVAL;
+ }
+ if (len >= MAX_VLAN_LIST_LEN)
+ return -EOPNOTSUPP;
+ ivvl[len] = nla_data(attr);
+
+ len++;
+ }
+ if (len == 0)
+ return -EINVAL;
+
+ err = ops->ndo_set_vf_vlan(dev, ivvl[0]->vf, ivvl[0]->vlan,
+ ivvl[0]->qos, ivvl[0]->vlan_proto);
if (err < 0)
return err;
}
@@ -2791,7 +2852,10 @@ nla_put_failure:
static inline size_t rtnl_fdb_nlmsg_size(void)
{
- return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(ETH_ALEN);
+ return NLMSG_ALIGN(sizeof(struct ndmsg)) +
+ nla_total_size(ETH_ALEN) + /* NDA_LLADDR */
+ nla_total_size(sizeof(u16)) + /* NDA_VLAN */
+ 0;
}
static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, u16 vid, int type,
@@ -3066,7 +3130,7 @@ static int nlmsg_populate_fdb(struct sk_buff *skb,
seq = cb->nlh->nlmsg_seq;
list_for_each_entry(ha, &list->list, list) {
- if (*idx < cb->args[0])
+ if (*idx < cb->args[2])
goto skip;
err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, 0,
@@ -3093,19 +3157,18 @@ int ndo_dflt_fdb_dump(struct sk_buff *skb,
struct netlink_callback *cb,
struct net_device *dev,
struct net_device *filter_dev,
- int idx)
+ int *idx)
{
int err;
netif_addr_lock_bh(dev);
- err = nlmsg_populate_fdb(skb, cb, dev, &idx, &dev->uc);
+ err = nlmsg_populate_fdb(skb, cb, dev, idx, &dev->uc);
if (err)
goto out;
- nlmsg_populate_fdb(skb, cb, dev, &idx, &dev->mc);
+ nlmsg_populate_fdb(skb, cb, dev, idx, &dev->mc);
out:
netif_addr_unlock_bh(dev);
- cb->args[1] = err;
- return idx;
+ return err;
}
EXPORT_SYMBOL(ndo_dflt_fdb_dump);
@@ -3118,9 +3181,13 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
const struct net_device_ops *cops = NULL;
struct ifinfomsg *ifm = nlmsg_data(cb->nlh);
struct net *net = sock_net(skb->sk);
+ struct hlist_head *head;
int brport_idx = 0;
int br_idx = 0;
- int idx = 0;
+ int h, s_h;
+ int idx = 0, s_idx;
+ int err = 0;
+ int fidx = 0;
if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX,
ifla_policy) == 0) {
@@ -3138,49 +3205,71 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
ops = br_dev->netdev_ops;
}
- cb->args[1] = 0;
- for_each_netdev(net, dev) {
- if (brport_idx && (dev->ifindex != brport_idx))
- continue;
+ s_h = cb->args[0];
+ s_idx = cb->args[1];
- if (!br_idx) { /* user did not specify a specific bridge */
- if (dev->priv_flags & IFF_BRIDGE_PORT) {
- br_dev = netdev_master_upper_dev_get(dev);
- cops = br_dev->netdev_ops;
- }
+ for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
+ idx = 0;
+ head = &net->dev_index_head[h];
+ hlist_for_each_entry(dev, head, index_hlist) {
- } else {
- if (dev != br_dev &&
- !(dev->priv_flags & IFF_BRIDGE_PORT))
+ if (brport_idx && (dev->ifindex != brport_idx))
continue;
- if (br_dev != netdev_master_upper_dev_get(dev) &&
- !(dev->priv_flags & IFF_EBRIDGE))
- continue;
+ if (!br_idx) { /* user did not specify a specific bridge */
+ if (dev->priv_flags & IFF_BRIDGE_PORT) {
+ br_dev = netdev_master_upper_dev_get(dev);
+ cops = br_dev->netdev_ops;
+ }
+ } else {
+ if (dev != br_dev &&
+ !(dev->priv_flags & IFF_BRIDGE_PORT))
+ continue;
- cops = ops;
- }
+ if (br_dev != netdev_master_upper_dev_get(dev) &&
+ !(dev->priv_flags & IFF_EBRIDGE))
+ continue;
+ cops = ops;
+ }
- if (dev->priv_flags & IFF_BRIDGE_PORT) {
- if (cops && cops->ndo_fdb_dump)
- idx = cops->ndo_fdb_dump(skb, cb, br_dev, dev,
- idx);
- }
- if (cb->args[1] == -EMSGSIZE)
- break;
+ if (idx < s_idx)
+ goto cont;
- if (dev->netdev_ops->ndo_fdb_dump)
- idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, NULL,
- idx);
- else
- idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx);
- if (cb->args[1] == -EMSGSIZE)
- break;
+ if (dev->priv_flags & IFF_BRIDGE_PORT) {
+ if (cops && cops->ndo_fdb_dump) {
+ err = cops->ndo_fdb_dump(skb, cb,
+ br_dev, dev,
+ &fidx);
+ if (err == -EMSGSIZE)
+ goto out;
+ }
+ }
- cops = NULL;
+ if (dev->netdev_ops->ndo_fdb_dump)
+ err = dev->netdev_ops->ndo_fdb_dump(skb, cb,
+ dev, NULL,
+ &fidx);
+ else
+ err = ndo_dflt_fdb_dump(skb, cb, dev, NULL,
+ &fidx);
+ if (err == -EMSGSIZE)
+ goto out;
+
+ cops = NULL;
+
+ /* reset fdb offset to 0 for rest of the interfaces */
+ cb->args[2] = 0;
+ fidx = 0;
+cont:
+ idx++;
+ }
}
- cb->args[0] = idx;
+out:
+ cb->args[0] = h;
+ cb->args[1] = idx;
+ cb->args[2] = fidx;
+
return skb->len;
}
@@ -3550,6 +3639,91 @@ static bool stats_attr_valid(unsigned int mask, int attrid, int idxattr)
(!idxattr || idxattr == attrid);
}
+#define IFLA_OFFLOAD_XSTATS_FIRST (IFLA_OFFLOAD_XSTATS_UNSPEC + 1)
+static int rtnl_get_offload_stats_attr_size(int attr_id)
+{
+ switch (attr_id) {
+ case IFLA_OFFLOAD_XSTATS_CPU_HIT:
+ return sizeof(struct rtnl_link_stats64);
+ }
+
+ return 0;
+}
+
+static int rtnl_get_offload_stats(struct sk_buff *skb, struct net_device *dev,
+ int *prividx)
+{
+ struct nlattr *attr = NULL;
+ int attr_id, size;
+ void *attr_data;
+ int err;
+
+ if (!(dev->netdev_ops && dev->netdev_ops->ndo_has_offload_stats &&
+ dev->netdev_ops->ndo_get_offload_stats))
+ return -ENODATA;
+
+ for (attr_id = IFLA_OFFLOAD_XSTATS_FIRST;
+ attr_id <= IFLA_OFFLOAD_XSTATS_MAX; attr_id++) {
+ if (attr_id < *prividx)
+ continue;
+
+ size = rtnl_get_offload_stats_attr_size(attr_id);
+ if (!size)
+ continue;
+
+ if (!dev->netdev_ops->ndo_has_offload_stats(attr_id))
+ continue;
+
+ attr = nla_reserve_64bit(skb, attr_id, size,
+ IFLA_OFFLOAD_XSTATS_UNSPEC);
+ if (!attr)
+ goto nla_put_failure;
+
+ attr_data = nla_data(attr);
+ memset(attr_data, 0, size);
+ err = dev->netdev_ops->ndo_get_offload_stats(attr_id, dev,
+ attr_data);
+ if (err)
+ goto get_offload_stats_failure;
+ }
+
+ if (!attr)
+ return -ENODATA;
+
+ *prividx = 0;
+ return 0;
+
+nla_put_failure:
+ err = -EMSGSIZE;
+get_offload_stats_failure:
+ *prividx = attr_id;
+ return err;
+}
+
+static int rtnl_get_offload_stats_size(const struct net_device *dev)
+{
+ int nla_size = 0;
+ int attr_id;
+ int size;
+
+ if (!(dev->netdev_ops && dev->netdev_ops->ndo_has_offload_stats &&
+ dev->netdev_ops->ndo_get_offload_stats))
+ return 0;
+
+ for (attr_id = IFLA_OFFLOAD_XSTATS_FIRST;
+ attr_id <= IFLA_OFFLOAD_XSTATS_MAX; attr_id++) {
+ if (!dev->netdev_ops->ndo_has_offload_stats(attr_id))
+ continue;
+ size = rtnl_get_offload_stats_attr_size(attr_id);
+ nla_size += nla_total_size_64bit(size);
+ }
+
+ if (nla_size != 0)
+ nla_size += nla_total_size(0);
+
+ return nla_size;
+}
+
static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
int type, u32 pid, u32 seq, u32 change,
unsigned int flags, unsigned int filter_mask,
@@ -3559,6 +3733,7 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
struct nlmsghdr *nlh;
struct nlattr *attr;
int s_prividx = *prividx;
+ int err;
ASSERT_RTNL();
@@ -3587,8 +3762,6 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
if (ops && ops->fill_linkxstats) {
- int err;
-
*idxattr = IFLA_STATS_LINK_XSTATS;
attr = nla_nest_start(skb,
IFLA_STATS_LINK_XSTATS);
@@ -3612,8 +3785,6 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
if (master)
ops = master->rtnl_link_ops;
if (ops && ops->fill_linkxstats) {
- int err;
-
*idxattr = IFLA_STATS_LINK_XSTATS_SLAVE;
attr = nla_nest_start(skb,
IFLA_STATS_LINK_XSTATS_SLAVE);
@@ -3628,6 +3799,24 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
}
}
+ if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS,
+ *idxattr)) {
+ *idxattr = IFLA_STATS_LINK_OFFLOAD_XSTATS;
+ attr = nla_nest_start(skb, IFLA_STATS_LINK_OFFLOAD_XSTATS);
+ if (!attr)
+ goto nla_put_failure;
+
+ err = rtnl_get_offload_stats(skb, dev, prividx);
+ if (err == -ENODATA)
+ nla_nest_cancel(skb, attr);
+ else
+ nla_nest_end(skb, attr);
+
+ if (err && err != -ENODATA)
+ goto nla_put_failure;
+ *idxattr = 0;
+ }
+
nlmsg_end(skb, nlh);
return 0;
@@ -3642,10 +3831,6 @@ nla_put_failure:
return -EMSGSIZE;
}
-static const struct nla_policy ifla_stats_policy[IFLA_STATS_MAX + 1] = {
- [IFLA_STATS_LINK_64] = { .len = sizeof(struct rtnl_link_stats64) },
-};
-
static size_t if_nlmsg_stats_size(const struct net_device *dev,
u32 filter_mask)
{
@@ -3685,6 +3870,9 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
}
}
+ if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS, 0))
+ size += rtnl_get_offload_stats_size(dev);
+
return size;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3864b4b68fa1..1e3e0087245b 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1962,37 +1962,13 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
return false;
}
-ssize_t skb_socket_splice(struct sock *sk,
- struct pipe_inode_info *pipe,
- struct splice_pipe_desc *spd)
-{
- int ret;
-
- /* Drop the socket lock, otherwise we have reverse
- * locking dependencies between sk_lock and i_mutex
- * here as compared to sendfile(). We enter here
- * with the socket lock held, and splice_to_pipe() will
- * grab the pipe inode lock. For sendfile() emulation,
- * we call into ->sendpage() with the i_mutex lock held
- * and networking will grab the socket lock.
- */
- release_sock(sk);
- ret = splice_to_pipe(pipe, spd);
- lock_sock(sk);
-
- return ret;
-}
-
/*
* Map data from the skb to a pipe. Should handle both the linear part,
* the fragments, and the frag list.
*/
int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
struct pipe_inode_info *pipe, unsigned int tlen,
- unsigned int flags,
- ssize_t (*splice_cb)(struct sock *,
- struct pipe_inode_info *,
- struct splice_pipe_desc *))
+ unsigned int flags)
{
struct partial_page partial[MAX_SKB_FRAGS];
struct page *pages[MAX_SKB_FRAGS];
@@ -2009,7 +1985,7 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
__skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk);
if (spd.nr_pages)
- ret = splice_cb(sk, pipe, &spd);
+ ret = splice_to_pipe(pipe, &spd);
return ret;
}
@@ -2445,6 +2421,25 @@ void skb_queue_purge(struct sk_buff_head *list)
EXPORT_SYMBOL(skb_queue_purge);
/**
+ * skb_rbtree_purge - empty a skb rbtree
+ * @root: root of the rbtree to empty
+ *
+ * Delete all buffers on an &sk_buff rbtree. Each buffer is removed from
+ * the list and one reference dropped. This function does not take
+ * any lock. Synchronization should be handled by the caller (e.g., TCP
+ * out-of-order queue is protected by the socket lock).
+ */
+void skb_rbtree_purge(struct rb_root *root)
+{
+ struct sk_buff *skb, *next;
+
+ rbtree_postorder_for_each_entry_safe(skb, next, root, rbnode)
+ kfree_skb(skb);
+
+ *root = RB_ROOT;
+}
+
+/**
* skb_queue_head - queue a buffer at the list head
* @list: list to use
* @newsk: buffer to queue
@@ -3078,11 +3073,31 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
sg = !!(features & NETIF_F_SG);
csum = !!can_checksum_protocol(features, proto);
- /* GSO partial only requires that we trim off any excess that
- * doesn't fit into an MSS sized block, so take care of that
- * now.
- */
- if (sg && csum && (features & NETIF_F_GSO_PARTIAL)) {
+ if (sg && csum && (mss != GSO_BY_FRAGS)) {
+ if (!(features & NETIF_F_GSO_PARTIAL)) {
+ struct sk_buff *iter;
+
+ if (!list_skb ||
+ !net_gso_ok(features, skb_shinfo(head_skb)->gso_type))
+ goto normal;
+
+ /* Split the buffer at the frag_list pointer.
+ * This is based on the assumption that all
+ * buffers in the chain excluding the last
+ * containing the same amount of data.
+ */
+ skb_walk_frags(head_skb, iter) {
+ if (skb_headlen(iter))
+ goto normal;
+
+ len -= iter->len;
+ }
+ }
+
+ /* GSO partial only requires that we trim off any excess that
+ * doesn't fit into an MSS sized block, so take care of that
+ * now.
+ */
partial_segs = len / mss;
if (partial_segs > 1)
mss *= partial_segs;
@@ -3090,6 +3105,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
partial_segs = 0;
}
+normal:
headroom = skb_headroom(head_skb);
pos = skb_headlen(head_skb);
@@ -3281,21 +3297,29 @@ perform_csum_check:
*/
segs->prev = tail;
- /* Update GSO info on first skb in partial sequence. */
if (partial_segs) {
+ struct sk_buff *iter;
int type = skb_shinfo(head_skb)->gso_type;
+ unsigned short gso_size = skb_shinfo(head_skb)->gso_size;
/* Update type to add partial and then remove dodgy if set */
- type |= SKB_GSO_PARTIAL;
+ type |= (features & NETIF_F_GSO_PARTIAL) / NETIF_F_GSO_PARTIAL * SKB_GSO_PARTIAL;
type &= ~SKB_GSO_DODGY;
/* Update GSO info and prepare to start updating headers on
* our way back down the stack of protocols.
*/
- skb_shinfo(segs)->gso_size = skb_shinfo(head_skb)->gso_size;
- skb_shinfo(segs)->gso_segs = partial_segs;
- skb_shinfo(segs)->gso_type = type;
- SKB_GSO_CB(segs)->data_offset = skb_headroom(segs) + doffset;
+ for (iter = segs; iter; iter = iter->next) {
+ skb_shinfo(iter)->gso_size = gso_size;
+ skb_shinfo(iter)->gso_segs = partial_segs;
+ skb_shinfo(iter)->gso_type = type;
+ SKB_GSO_CB(iter)->data_offset = skb_headroom(iter) + doffset;
+ }
+
+ if (tail->len - doffset <= gso_size)
+ skb_shinfo(tail)->gso_size = 0;
+ else if (tail != segs)
+ skb_shinfo(tail)->gso_segs = DIV_ROUND_UP(tail->len - doffset, gso_size);
}
/* Following permits correct backpressure, for protocols
@@ -4474,17 +4498,24 @@ int skb_ensure_writable(struct sk_buff *skb, int write_len)
}
EXPORT_SYMBOL(skb_ensure_writable);
-/* remove VLAN header from packet and update csum accordingly. */
-static int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci)
+/* remove VLAN header from packet and update csum accordingly.
+ * expects a non skb_vlan_tag_present skb with a vlan tag payload
+ */
+int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci)
{
struct vlan_hdr *vhdr;
- unsigned int offset = skb->data - skb_mac_header(skb);
+ int offset = skb->data - skb_mac_header(skb);
int err;
- __skb_push(skb, offset);
+ if (WARN_ONCE(offset,
+ "__skb_vlan_pop got skb with skb->data not at mac header (offset %d)\n",
+ offset)) {
+ return -EINVAL;
+ }
+
err = skb_ensure_writable(skb, VLAN_ETH_HLEN);
if (unlikely(err))
- goto pull;
+ return err;
skb_postpull_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);
@@ -4501,12 +4532,14 @@ static int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci)
skb_set_network_header(skb, ETH_HLEN);
skb_reset_mac_len(skb);
-pull:
- __skb_pull(skb, offset);
return err;
}
+EXPORT_SYMBOL(__skb_vlan_pop);
+/* Pop a vlan tag either from hwaccel or from payload.
+ * Expects skb->data at mac header.
+ */
int skb_vlan_pop(struct sk_buff *skb)
{
u16 vlan_tci;
@@ -4516,9 +4549,7 @@ int skb_vlan_pop(struct sk_buff *skb)
if (likely(skb_vlan_tag_present(skb))) {
skb->vlan_tci = 0;
} else {
- if (unlikely((skb->protocol != htons(ETH_P_8021Q) &&
- skb->protocol != htons(ETH_P_8021AD)) ||
- skb->len < VLAN_ETH_HLEN))
+ if (unlikely(!eth_type_vlan(skb->protocol)))
return 0;
err = __skb_vlan_pop(skb, &vlan_tci);
@@ -4526,9 +4557,7 @@ int skb_vlan_pop(struct sk_buff *skb)
return err;
}
/* move next vlan tag to hw accel tag */
- if (likely((skb->protocol != htons(ETH_P_8021Q) &&
- skb->protocol != htons(ETH_P_8021AD)) ||
- skb->len < VLAN_ETH_HLEN))
+ if (likely(!eth_type_vlan(skb->protocol)))
return 0;
vlan_proto = skb->protocol;
@@ -4541,29 +4570,30 @@ int skb_vlan_pop(struct sk_buff *skb)
}
EXPORT_SYMBOL(skb_vlan_pop);
+/* Push a vlan tag either into hwaccel or into payload (if hwaccel tag present).
+ * Expects skb->data at mac header.
+ */
int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
{
if (skb_vlan_tag_present(skb)) {
- unsigned int offset = skb->data - skb_mac_header(skb);
+ int offset = skb->data - skb_mac_header(skb);
int err;
- /* __vlan_insert_tag expect skb->data pointing to mac header.
- * So change skb->data before calling it and change back to
- * original position later
- */
- __skb_push(skb, offset);
+ if (WARN_ONCE(offset,
+ "skb_vlan_push got skb with skb->data not at mac header (offset %d)\n",
+ offset)) {
+ return -EINVAL;
+ }
+
err = __vlan_insert_tag(skb, skb->vlan_proto,
skb_vlan_tag_get(skb));
- if (err) {
- __skb_pull(skb, offset);
+ if (err)
return err;
- }
skb->protocol = skb->vlan_proto;
skb->mac_len += VLAN_HLEN;
skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);
- __skb_pull(skb, offset);
}
__vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
return 0;
diff --git a/net/core/sock.c b/net/core/sock.c
index 25dab8b60223..5e3ca414357e 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -453,7 +453,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
EXPORT_SYMBOL(sock_queue_rcv_skb);
int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
- const int nested, unsigned int trim_cap)
+ const int nested, unsigned int trim_cap, bool refcounted)
{
int rc = NET_RX_SUCCESS;
@@ -487,7 +487,8 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
bh_unlock_sock(sk);
out:
- sock_put(sk);
+ if (refcounted)
+ sock_put(sk);
return rc;
discard_and_relse:
kfree_skb(skb);
@@ -1315,24 +1316,6 @@ static void sock_copy(struct sock *nsk, const struct sock *osk)
#endif
}
-void sk_prot_clear_portaddr_nulls(struct sock *sk, int size)
-{
- unsigned long nulls1, nulls2;
-
- nulls1 = offsetof(struct sock, __sk_common.skc_node.next);
- nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next);
- if (nulls1 > nulls2)
- swap(nulls1, nulls2);
-
- if (nulls1 != 0)
- memset((char *)sk, 0, nulls1);
- memset((char *)sk + nulls1 + sizeof(void *), 0,
- nulls2 - nulls1 - sizeof(void *));
- memset((char *)sk + nulls2 + sizeof(void *), 0,
- size - nulls2 - sizeof(void *));
-}
-EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls);
-
static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
int family)
{
@@ -1344,12 +1327,8 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
if (!sk)
return sk;
- if (priority & __GFP_ZERO) {
- if (prot->clear_sk)
- prot->clear_sk(sk, prot->obj_size);
- else
- sk_prot_clear_nulls(sk, prot->obj_size);
- }
+ if (priority & __GFP_ZERO)
+ sk_prot_clear_nulls(sk, prot->obj_size);
} else
sk = kmalloc(prot->obj_size, priority);
@@ -1362,7 +1341,6 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
if (!try_module_get(prot->owner))
goto out_free_sec;
sk_tx_queue_clear(sk);
- cgroup_sk_alloc(&sk->sk_cgrp_data);
}
return sk;
@@ -1386,6 +1364,7 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
slab = prot->slab;
cgroup_sk_free(&sk->sk_cgrp_data);
+ mem_cgroup_sk_free(sk);
security_sk_free(sk);
if (slab != NULL)
kmem_cache_free(slab, sk);
@@ -1422,6 +1401,8 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
sock_net_set(sk, net);
atomic_set(&sk->sk_wmem_alloc, 1);
+ mem_cgroup_sk_alloc(sk);
+ cgroup_sk_alloc(&sk->sk_cgrp_data);
sock_update_classid(&sk->sk_cgrp_data);
sock_update_netprioidx(&sk->sk_cgrp_data);
}
@@ -1563,9 +1544,14 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
newsk->sk_err = 0;
+ newsk->sk_err_soft = 0;
newsk->sk_priority = 0;
newsk->sk_incoming_cpu = raw_smp_processor_id();
atomic64_set(&newsk->sk_cookie, 0);
+
+ mem_cgroup_sk_alloc(newsk);
+ cgroup_sk_alloc(&newsk->sk_cgrp_data);
+
/*
* Before updating sk_refcnt, we must commit prior changes to memory
* (Documentation/RCU/rculist_nulls.txt for details)
@@ -1588,9 +1574,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
sk_set_socket(newsk, NULL);
newsk->sk_wq = NULL;
- if (mem_cgroup_sockets_enabled && sk->sk_memcg)
- sock_update_memcg(newsk);
-
if (newsk->sk_prot->sockets_allocated)
sk_sockets_allocated_inc(newsk);
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index e92b759d906c..9a1a352fd1eb 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -129,7 +129,6 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2)
return 0;
}
-EXPORT_SYMBOL(reuseport_add_sock);
static void reuseport_free_rcu(struct rcu_head *head)
{
diff --git a/net/core/stream.c b/net/core/stream.c
index 159516a11b7e..1086c8b280a8 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -43,7 +43,6 @@ void sk_stream_write_space(struct sock *sk)
rcu_read_unlock();
}
}
-EXPORT_SYMBOL(sk_stream_write_space);
/**
* sk_stream_wait_connect - Wait for a socket to get into the connected state