diff options
21 files changed, 1816 insertions, 1121 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 7d69a3061f17..4256d59eca2b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -78,9 +78,24 @@ config MLX5_ESWITCH Legacy SRIOV mode (L2 mac vlan steering based). Switchdev mode (eswitch offloads). +config MLX5_CLS_ACT + bool "MLX5 TC classifier action support" + depends on MLX5_ESWITCH && NET_CLS_ACT + default y + help + mlx5 ConnectX offloads support for TC classifier action (NET_CLS_ACT), + works in both native NIC mdoe and Switchdev SRIOV mode. + Actions get attached to a Hardware offloaded classifiers and are + invoked after a successful classification. Actions are used to + overwrite the classification result, instantly drop or redirect and/or + reformat packets in wire speeds without involving the host cpu. + + If set to N, TC offloads in both NIC and switchdev modes will be disabled. + If unsure, set to Y + config MLX5_TC_CT bool "MLX5 TC connection tracking offload support" - depends on MLX5_CORE_EN && NET_SWITCHDEV && NF_FLOW_TABLE && NET_ACT_CT && NET_TC_SKB_EXT + depends on MLX5_CLS_ACT && NF_FLOW_TABLE && NET_ACT_CT && NET_TC_SKB_EXT default y help Say Y here if you want to support offloading connection tracking rules diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index d3c7dbd7f1d5..e5ee9103fefb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -33,17 +33,19 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o -mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o \ - lib/geneve.o en/mapping.o en/tc_tun_vxlan.o en/tc_tun_gre.o \ - en/tc_tun_geneve.o diag/en_tc_tracepoint.o mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o +mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o lib/geneve.o lib/port_tun.o lag_mp.o +mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en_tc.o en/rep/tc.o en/rep/neigh.o \ + en/mapping.o esw/chains.o en/tc_tun.o \ + en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \ + en/tc_tun_mplsoudp.o diag/en_tc_tracepoint.o mlx5_core-$(CONFIG_MLX5_TC_CT) += en/tc_ct.o # # Core extra # mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \ - ecpf.o rdma.o esw/chains.o + ecpf.o rdma.o mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c new file mode 100644 index 000000000000..baa162432e75 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c @@ -0,0 +1,368 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies. */ + +#include <linux/refcount.h> +#include <linux/list.h> +#include <linux/rculist.h> +#include <linux/rtnetlink.h> +#include <linux/workqueue.h> +#include <linux/rwlock.h> +#include <linux/spinlock.h> +#include <linux/notifier.h> +#include <net/netevent.h> +#include "neigh.h" +#include "tc.h" +#include "en_rep.h" +#include "fs_core.h" +#include "diag/en_rep_tracepoint.h" + +static unsigned long mlx5e_rep_ipv6_interval(void) +{ + if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl) + return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME); + + return ~0UL; +} + +static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv) +{ + unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME); + unsigned long ipv6_interval = mlx5e_rep_ipv6_interval(); + struct net_device *netdev = rpriv->netdev; + struct mlx5e_priv *priv = netdev_priv(netdev); + + rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval); + mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval); +} + +void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + + mlx5_fc_queue_stats_work(priv->mdev, + &neigh_update->neigh_stats_work, + neigh_update->min_interval); +} + +static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe) +{ + return refcount_inc_not_zero(&nhe->refcnt); +} + +static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe); + +void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe) +{ + if (refcount_dec_and_test(&nhe->refcnt)) { + mlx5e_rep_neigh_entry_remove(nhe); + kfree_rcu(nhe, rcu); + } +} + +static struct mlx5e_neigh_hash_entry * +mlx5e_get_next_nhe(struct mlx5e_rep_priv *rpriv, + struct mlx5e_neigh_hash_entry *nhe) +{ + struct mlx5e_neigh_hash_entry *next = NULL; + + rcu_read_lock(); + + for (next = nhe ? + list_next_or_null_rcu(&rpriv->neigh_update.neigh_list, + &nhe->neigh_list, + struct mlx5e_neigh_hash_entry, + neigh_list) : + list_first_or_null_rcu(&rpriv->neigh_update.neigh_list, + struct mlx5e_neigh_hash_entry, + neigh_list); + next; + next = list_next_or_null_rcu(&rpriv->neigh_update.neigh_list, + &next->neigh_list, + struct mlx5e_neigh_hash_entry, + neigh_list)) + if (mlx5e_rep_neigh_entry_hold(next)) + break; + + rcu_read_unlock(); + + if (nhe) + mlx5e_rep_neigh_entry_release(nhe); + + return next; +} + +static void mlx5e_rep_neigh_stats_work(struct work_struct *work) +{ + struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv, + neigh_update.neigh_stats_work.work); + struct net_device *netdev = rpriv->netdev; + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_neigh_hash_entry *nhe = NULL; + + rtnl_lock(); + if (!list_empty(&rpriv->neigh_update.neigh_list)) + mlx5e_rep_queue_neigh_stats_work(priv); + + while ((nhe = mlx5e_get_next_nhe(rpriv, nhe)) != NULL) + mlx5e_tc_update_neigh_used_value(nhe); + + rtnl_unlock(); +} + +static void mlx5e_rep_neigh_update(struct work_struct *work) +{ + struct mlx5e_neigh_hash_entry *nhe = + container_of(work, struct mlx5e_neigh_hash_entry, neigh_update_work); + struct neighbour *n = nhe->n; + struct mlx5e_encap_entry *e; + unsigned char ha[ETH_ALEN]; + struct mlx5e_priv *priv; + bool neigh_connected; + u8 nud_state, dead; + + rtnl_lock(); + + /* If these parameters are changed after we release the lock, + * we'll receive another event letting us know about it. + * We use this lock to avoid inconsistency between the neigh validity + * and it's hw address. + */ + read_lock_bh(&n->lock); + memcpy(ha, n->ha, ETH_ALEN); + nud_state = n->nud_state; + dead = n->dead; + read_unlock_bh(&n->lock); + + neigh_connected = (nud_state & NUD_VALID) && !dead; + + trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected); + + list_for_each_entry(e, &nhe->encap_list, encap_list) { + if (!mlx5e_encap_take(e)) + continue; + + priv = netdev_priv(e->out_dev); + mlx5e_rep_update_flows(priv, e, neigh_connected, ha); + mlx5e_encap_put(priv, e); + } + mlx5e_rep_neigh_entry_release(nhe); + rtnl_unlock(); + neigh_release(n); +} + +static void mlx5e_rep_queue_neigh_update_work(struct mlx5e_priv *priv, + struct mlx5e_neigh_hash_entry *nhe, + struct neighbour *n) +{ + /* Take a reference to ensure the neighbour and mlx5 encap + * entry won't be destructed until we drop the reference in + * delayed work. + */ + neigh_hold(n); + + /* This assignment is valid as long as the the neigh reference + * is taken + */ + nhe->n = n; + + if (!queue_work(priv->wq, &nhe->neigh_update_work)) { + mlx5e_rep_neigh_entry_release(nhe); + neigh_release(n); + } +} + +static int mlx5e_rep_netevent_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv, + neigh_update.netevent_nb); + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + struct net_device *netdev = rpriv->netdev; + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_neigh_hash_entry *nhe = NULL; + struct mlx5e_neigh m_neigh = {}; + struct neigh_parms *p; + struct neighbour *n; + bool found = false; + + switch (event) { + case NETEVENT_NEIGH_UPDATE: + n = ptr; +#if IS_ENABLED(CONFIG_IPV6) + if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl) +#else + if (n->tbl != &arp_tbl) +#endif + return NOTIFY_DONE; + + m_neigh.dev = n->dev; + m_neigh.family = n->ops->family; + memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len); + + rcu_read_lock(); + nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh); + rcu_read_unlock(); + if (!nhe) + return NOTIFY_DONE; + + mlx5e_rep_queue_neigh_update_work(priv, nhe, n); + break; + + case NETEVENT_DELAY_PROBE_TIME_UPDATE: + p = ptr; + + /* We check the device is present since we don't care about + * changes in the default table, we only care about changes + * done per device delay prob time parameter. + */ +#if IS_ENABLED(CONFIG_IPV6) + if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl)) +#else + if (!p->dev || p->tbl != &arp_tbl) +#endif + return NOTIFY_DONE; + + rcu_read_lock(); + list_for_each_entry_rcu(nhe, &neigh_update->neigh_list, + neigh_list) { + if (p->dev == nhe->m_neigh.dev) { + found = true; + break; + } + } + rcu_read_unlock(); + if (!found) + return NOTIFY_DONE; + + neigh_update->min_interval = min_t(unsigned long, + NEIGH_VAR(p, DELAY_PROBE_TIME), + neigh_update->min_interval); + mlx5_fc_update_sampling_interval(priv->mdev, + neigh_update->min_interval); + break; + } + return NOTIFY_DONE; +} + +static const struct rhashtable_params mlx5e_neigh_ht_params = { + .head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node), + .key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh), + .key_len = sizeof(struct mlx5e_neigh), + .automatic_shrinking = true, +}; + +int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + int err; + + err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params); + if (err) + return err; + + INIT_LIST_HEAD(&neigh_update->neigh_list); + mutex_init(&neigh_update->encap_lock); + INIT_DELAYED_WORK(&neigh_update->neigh_stats_work, + mlx5e_rep_neigh_stats_work); + mlx5e_rep_neigh_update_init_interval(rpriv); + + rpriv->neigh_update.netevent_nb.notifier_call = mlx5e_rep_netevent_event; + err = register_netevent_notifier(&rpriv->neigh_update.netevent_nb); + if (err) + goto out_err; + return 0; + +out_err: + rhashtable_destroy(&neigh_update->neigh_ht); + return err; +} + +void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + + unregister_netevent_notifier(&neigh_update->netevent_nb); + + flush_workqueue(priv->wq); /* flush neigh update works */ + + cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work); + + mutex_destroy(&neigh_update->encap_lock); + rhashtable_destroy(&neigh_update->neigh_ht); +} + +static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv, + struct mlx5e_neigh_hash_entry *nhe) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + int err; + + err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht, + &nhe->rhash_node, + mlx5e_neigh_ht_params); + if (err) + return err; + + list_add_rcu(&nhe->neigh_list, &rpriv->neigh_update.neigh_list); + + return err; +} + +static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe) +{ + struct mlx5e_rep_priv *rpriv = nhe->priv->ppriv; + + mutex_lock(&rpriv->neigh_update.encap_lock); + + list_del_rcu(&nhe->neigh_list); + + rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht, + &nhe->rhash_node, + mlx5e_neigh_ht_params); + mutex_unlock(&rpriv->neigh_update.encap_lock); +} + +/* This function must only be called under the representor's encap_lock or + * inside rcu read lock section. + */ +struct mlx5e_neigh_hash_entry * +mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, + struct mlx5e_neigh *m_neigh) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + struct mlx5e_neigh_hash_entry *nhe; + + nhe = rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh, + mlx5e_neigh_ht_params); + return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL; +} + +int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct mlx5e_neigh_hash_entry **nhe) +{ + int err; + + *nhe = kzalloc(sizeof(**nhe), GFP_KERNEL); + if (!*nhe) + return -ENOMEM; + + (*nhe)->priv = priv; + memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh)); + INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update); + spin_lock_init(&(*nhe)->encap_list_lock); + INIT_LIST_HEAD(&(*nhe)->encap_list); + refcount_set(&(*nhe)->refcnt, 1); + + err = mlx5e_rep_neigh_entry_insert(priv, *nhe); + if (err) + goto out_free; + return 0; + +out_free: + kfree(*nhe); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h new file mode 100644 index 000000000000..32b239189c95 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies. */ + +#ifndef __MLX5_EN_REP_NEIGH__ +#define __MLX5_EN_REP_NEIGH__ + +#include "en.h" +#include "en_rep.h" + +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) + +int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv); +void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv); + +struct mlx5e_neigh_hash_entry * +mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, + struct mlx5e_neigh *m_neigh); +int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct mlx5e_neigh_hash_entry **nhe); +void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe); + +void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv); + +#else /* CONFIG_MLX5_CLS_ACT */ + +static inline int +mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv) { return 0; } +static inline void +mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv) {} + +#endif /* CONFIG_MLX5_CLS_ACT */ + +#endif /* __MLX5_EN_REP_NEIGH__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c new file mode 100644 index 000000000000..c609a5e50ebc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -0,0 +1,711 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies. */ + +#include <net/dst_metadata.h> +#include <linux/netdevice.h> +#include <linux/list.h> +#include <linux/rculist.h> +#include <linux/rtnetlink.h> +#include <linux/workqueue.h> +#include <linux/spinlock.h> +#include "tc.h" +#include "neigh.h" +#include "en_rep.h" +#include "eswitch.h" +#include "esw/chains.h" +#include "en/tc_ct.h" +#include "en/mapping.h" +#include "en/tc_tun.h" +#include "lib/port_tun.h" + +struct mlx5e_rep_indr_block_priv { + struct net_device *netdev; + struct mlx5e_rep_priv *rpriv; + + struct list_head list; +}; + +int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; + struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy; + struct mlx5e_neigh_hash_entry *nhe; + int err; + + err = mlx5_tun_entropy_refcount_inc(tun_entropy, e->reformat_type); + if (err) + return err; + + mutex_lock(&rpriv->neigh_update.encap_lock); + nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh); + if (!nhe) { + err = mlx5e_rep_neigh_entry_create(priv, e, &nhe); + if (err) { + mutex_unlock(&rpriv->neigh_update.encap_lock); + mlx5_tun_entropy_refcount_dec(tun_entropy, + e->reformat_type); + return err; + } + } + + e->nhe = nhe; + spin_lock(&nhe->encap_list_lock); + list_add_rcu(&e->encap_list, &nhe->encap_list); + spin_unlock(&nhe->encap_list_lock); + + mutex_unlock(&rpriv->neigh_update.encap_lock); + + return 0; +} + +void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; + struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy; + + if (!e->nhe) + return; + + spin_lock(&e->nhe->encap_list_lock); + list_del_rcu(&e->encap_list); + spin_unlock(&e->nhe->encap_list_lock); + + mlx5e_rep_neigh_entry_release(e->nhe); + e->nhe = NULL; + mlx5_tun_entropy_refcount_dec(tun_entropy, e->reformat_type); +} + +void mlx5e_rep_update_flows(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + bool neigh_connected, + unsigned char ha[ETH_ALEN]) +{ + struct ethhdr *eth = (struct ethhdr *)e->encap_header; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + bool encap_connected; + LIST_HEAD(flow_list); + + ASSERT_RTNL(); + + /* wait for encap to be fully initialized */ + wait_for_completion(&e->res_ready); + + mutex_lock(&esw->offloads.encap_tbl_lock); + encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID); + if (e->compl_result < 0 || (encap_connected == neigh_connected && + ether_addr_equal(e->h_dest, ha))) + goto unlock; + + mlx5e_take_all_encap_flows(e, &flow_list); + + if ((e->flags & MLX5_ENCAP_ENTRY_VALID) && + (!neigh_connected || !ether_addr_equal(e->h_dest, ha))) + mlx5e_tc_encap_flows_del(priv, e, &flow_list); + + if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) { + ether_addr_copy(e->h_dest, ha); + ether_addr_copy(eth->h_dest, ha); + /* Update the encap source mac, in case that we delete + * the flows when encap source mac changed. + */ + ether_addr_copy(eth->h_source, e->route_dev->dev_addr); + + mlx5e_tc_encap_flows_add(priv, e, &flow_list); + } +unlock: + mutex_unlock(&esw->offloads.encap_tbl_lock); + mlx5e_put_encap_flow_list(priv, &flow_list); +} + +static int +mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv, + struct flow_cls_offload *cls_flower, int flags) +{ + switch (cls_flower->command) { + case FLOW_CLS_REPLACE: + return mlx5e_configure_flower(priv->netdev, priv, cls_flower, + flags); + case FLOW_CLS_DESTROY: + return mlx5e_delete_flower(priv->netdev, priv, cls_flower, + flags); + case FLOW_CLS_STATS: + return mlx5e_stats_flower(priv->netdev, priv, cls_flower, + flags); + default: + return -EOPNOTSUPP; + } +} + +static +int mlx5e_rep_setup_tc_cls_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma) +{ + switch (ma->command) { + case TC_CLSMATCHALL_REPLACE: + return mlx5e_tc_configure_matchall(priv, ma); + case TC_CLSMATCHALL_DESTROY: + return mlx5e_tc_delete_matchall(priv, ma); + case TC_CLSMATCHALL_STATS: + mlx5e_tc_stats_matchall(priv, ma); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD); + struct mlx5e_priv *priv = cb_priv; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return mlx5e_rep_setup_tc_cls_flower(priv, type_data, flags); + case TC_SETUP_CLSMATCHALL: + return mlx5e_rep_setup_tc_cls_matchall(priv, type_data); + default: + return -EOPNOTSUPP; + } +} + +static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + struct flow_cls_offload tmp, *f = type_data; + struct mlx5e_priv *priv = cb_priv; + struct mlx5_eswitch *esw; + unsigned long flags; + int err; + + flags = MLX5_TC_FLAG(INGRESS) | + MLX5_TC_FLAG(ESW_OFFLOAD) | + MLX5_TC_FLAG(FT_OFFLOAD); + esw = priv->mdev->priv.eswitch; + + switch (type) { + case TC_SETUP_CLSFLOWER: + memcpy(&tmp, f, sizeof(*f)); + + if (!mlx5_esw_chains_prios_supported(esw)) + return -EOPNOTSUPP; + + /* Re-use tc offload path by moving the ft flow to the + * reserved ft chain. + * + * FT offload can use prio range [0, INT_MAX], so we normalize + * it to range [1, mlx5_esw_chains_get_prio_range(esw)] + * as with tc, where prio 0 isn't supported. + * + * We only support chain 0 of FT offload. + */ + if (tmp.common.prio >= mlx5_esw_chains_get_prio_range(esw)) + return -EOPNOTSUPP; + if (tmp.common.chain_index != 0) + return -EOPNOTSUPP; + + tmp.common.chain_index = mlx5_esw_chains_get_ft_chain(esw); + tmp.common.prio++; + err = mlx5e_rep_setup_tc_cls_flower(priv, &tmp, flags); + memcpy(&f->stats, &tmp.stats, sizeof(f->stats)); + return err; + default: + return -EOPNOTSUPP; + } +} + +static LIST_HEAD(mlx5e_rep_block_tc_cb_list); +static LIST_HEAD(mlx5e_rep_block_ft_cb_list); +int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct flow_block_offload *f = type_data; + + f->unlocked_driver_cb = true; + + switch (type) { + case TC_SETUP_BLOCK: + return flow_block_cb_setup_simple(type_data, + &mlx5e_rep_block_tc_cb_list, + mlx5e_rep_setup_tc_cb, + priv, priv, true); + case TC_SETUP_FT: + return flow_block_cb_setup_simple(type_data, + &mlx5e_rep_block_ft_cb_list, + mlx5e_rep_setup_ft_cb, + priv, priv, true); + default: + return -EOPNOTSUPP; + } +} + +int mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; + int err; + + mutex_init(&uplink_priv->unready_flows_lock); + INIT_LIST_HEAD(&uplink_priv->unready_flows); + + /* init shared tc flow table */ + err = mlx5e_tc_esw_init(&uplink_priv->tc_ht); + return err; +} + +void mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv) +{ + /* delete shared tc flow table */ + mlx5e_tc_esw_cleanup(&rpriv->uplink_priv.tc_ht); + mutex_destroy(&rpriv->uplink_priv.unready_flows_lock); +} + +void mlx5e_rep_tc_enable(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + INIT_WORK(&rpriv->uplink_priv.reoffload_flows_work, + mlx5e_tc_reoffload_flows_work); +} + +void mlx5e_rep_tc_disable(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work); +} + +int mlx5e_rep_tc_event_port_affinity(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + queue_work(priv->wq, &rpriv->uplink_priv.reoffload_flows_work); + + return NOTIFY_OK; +} + +static struct mlx5e_rep_indr_block_priv * +mlx5e_rep_indr_block_priv_lookup(struct mlx5e_rep_priv *rpriv, + struct net_device *netdev) +{ + struct mlx5e_rep_indr_block_priv *cb_priv; + + /* All callback list access should be protected by RTNL. */ + ASSERT_RTNL(); + + list_for_each_entry(cb_priv, + &rpriv->uplink_priv.tc_indr_block_priv_list, + list) + if (cb_priv->netdev == netdev) + return cb_priv; + + return NULL; +} + +static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv, + struct net_device *netdev); + +void mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5e_rep_indr_block_priv *cb_priv, *temp; + struct list_head *head = &rpriv->uplink_priv.tc_indr_block_priv_list; + + list_for_each_entry_safe(cb_priv, temp, head, list) { + mlx5e_rep_indr_unregister_block(rpriv, cb_priv->netdev); + kfree(cb_priv); + } +} + +static int +mlx5e_rep_indr_offload(struct net_device *netdev, + struct flow_cls_offload *flower, + struct mlx5e_rep_indr_block_priv *indr_priv, + unsigned long flags) +{ + struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev); + int err = 0; + + switch (flower->command) { + case FLOW_CLS_REPLACE: + err = mlx5e_configure_flower(netdev, priv, flower, flags); + break; + case FLOW_CLS_DESTROY: + err = mlx5e_delete_flower(netdev, priv, flower, flags); + break; + case FLOW_CLS_STATS: + err = mlx5e_stats_flower(netdev, priv, flower, flags); + break; + default: + err = -EOPNOTSUPP; + } + + return err; +} + +static int mlx5e_rep_indr_setup_tc_cb(enum tc_setup_type type, + void *type_data, void *indr_priv) +{ + unsigned long flags = MLX5_TC_FLAG(EGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD); + struct mlx5e_rep_indr_block_priv *priv = indr_priv; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return mlx5e_rep_indr_offload(priv->netdev, type_data, priv, + flags); + default: + return -EOPNOTSUPP; + } +} + +static int mlx5e_rep_indr_setup_ft_cb(enum tc_setup_type type, + void *type_data, void *indr_priv) +{ + struct mlx5e_rep_indr_block_priv *priv = indr_priv; + struct flow_cls_offload *f = type_data; + struct flow_cls_offload tmp; + struct mlx5e_priv *mpriv; + struct mlx5_eswitch *esw; + unsigned long flags; + int err; + + mpriv = netdev_priv(priv->rpriv->netdev); + esw = mpriv->mdev->priv.eswitch; + + flags = MLX5_TC_FLAG(EGRESS) | + MLX5_TC_FLAG(ESW_OFFLOAD) | + MLX5_TC_FLAG(FT_OFFLOAD); + + switch (type) { + case TC_SETUP_CLSFLOWER: + memcpy(&tmp, f, sizeof(*f)); + + /* Re-use tc offload path by moving the ft flow to the + * reserved ft chain. + * + * FT offload can use prio range [0, INT_MAX], so we normalize + * it to range [1, mlx5_esw_chains_get_prio_range(esw)] + * as with tc, where prio 0 isn't supported. + * + * We only support chain 0 of FT offload. + */ + if (!mlx5_esw_chains_prios_supported(esw) || + tmp.common.prio >= mlx5_esw_chains_get_prio_range(esw) || + tmp.common.chain_index) + return -EOPNOTSUPP; + + tmp.common.chain_index = mlx5_esw_chains_get_ft_chain(esw); + tmp.common.prio++; + err = mlx5e_rep_indr_offload(priv->netdev, &tmp, priv, flags); + memcpy(&f->stats, &tmp.stats, sizeof(f->stats)); + return err; + default: + return -EOPNOTSUPP; + } +} + +static void mlx5e_rep_indr_block_unbind(void *cb_priv) +{ + struct mlx5e_rep_indr_block_priv *indr_priv = cb_priv; + + list_del(&indr_priv->list); + kfree(indr_priv); +} + +static LIST_HEAD(mlx5e_block_cb_list); + +static int +mlx5e_rep_indr_setup_block(struct net_device *netdev, + struct mlx5e_rep_priv *rpriv, + struct flow_block_offload *f, + flow_setup_cb_t *setup_cb) +{ + struct mlx5e_rep_indr_block_priv *indr_priv; + struct flow_block_cb *block_cb; + + if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + f->unlocked_driver_cb = true; + f->driver_block_list = &mlx5e_block_cb_list; + + switch (f->command) { + case FLOW_BLOCK_BIND: + indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev); + if (indr_priv) + return -EEXIST; + + indr_priv = kmalloc(sizeof(*indr_priv), GFP_KERNEL); + if (!indr_priv) + return -ENOMEM; + + indr_priv->netdev = netdev; + indr_priv->rpriv = rpriv; + list_add(&indr_priv->list, + &rpriv->uplink_priv.tc_indr_block_priv_list); + + block_cb = flow_block_cb_alloc(setup_cb, indr_priv, indr_priv, + mlx5e_rep_indr_block_unbind); + if (IS_ERR(block_cb)) { + list_del(&indr_priv->list); + kfree(indr_priv); + return PTR_ERR(block_cb); + } + flow_block_cb_add(block_cb, f); + list_add_tail(&block_cb->driver_list, &mlx5e_block_cb_list); + + return 0; + case FLOW_BLOCK_UNBIND: + indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev); + if (!indr_priv) + return -ENOENT; + + block_cb = flow_block_cb_lookup(f->block, setup_cb, indr_priv); + if (!block_cb) + return -ENOENT; + + flow_block_cb_remove(block_cb, f); + list_del(&block_cb->driver_list); + return 0; + default: + return -EOPNOTSUPP; + } + return 0; +} + +static +int mlx5e_rep_indr_setup_cb(struct net_device *netdev, void *cb_priv, + enum tc_setup_type type, void *type_data) +{ + switch (type) { + case TC_SETUP_BLOCK: + return mlx5e_rep_indr_setup_block(netdev, cb_priv, type_data, + mlx5e_rep_indr_setup_tc_cb); + case TC_SETUP_FT: + return mlx5e_rep_indr_setup_block(netdev, cb_priv, type_data, + mlx5e_rep_indr_setup_ft_cb); + default: + return -EOPNOTSUPP; + } +} + +static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv, + struct net_device *netdev) +{ + int err; + + err = __flow_indr_block_cb_register(netdev, rpriv, + mlx5e_rep_indr_setup_cb, + rpriv); + if (err) { + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + + mlx5_core_err(priv->mdev, "Failed to register remote block notifier for %s err=%d\n", + netdev_name(netdev), err); + } + return err; +} + +static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv, + struct net_device *netdev) +{ + __flow_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_cb, + rpriv); +} + +static int mlx5e_nic_rep_netdevice_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv, + uplink_priv.netdevice_nb); + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + struct net_device *netdev = netdev_notifier_info_to_dev(ptr); + + if (!mlx5e_tc_tun_device_to_offload(priv, netdev) && + !(is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev)) + return NOTIFY_OK; + + switch (event) { + case NETDEV_REGISTER: + mlx5e_rep_indr_register_block(rpriv, netdev); + break; + case NETDEV_UNREGISTER: + mlx5e_rep_indr_unregister_block(rpriv, netdev); + break; + } + return NOTIFY_OK; +} + +int mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; + int err; + + /* init indirect block notifications */ + INIT_LIST_HEAD(&uplink_priv->tc_indr_block_priv_list); + + uplink_priv->netdevice_nb.notifier_call = mlx5e_nic_rep_netdevice_event; + err = register_netdevice_notifier_dev_net(rpriv->netdev, + &uplink_priv->netdevice_nb, + &uplink_priv->netdevice_nn); + return err; +} + +void mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; + + /* clean indirect TC block notifications */ + unregister_netdevice_notifier_dev_net(rpriv->netdev, + &uplink_priv->netdevice_nb, + &uplink_priv->netdevice_nn); +} + +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) +static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb, + struct mlx5e_tc_update_priv *tc_priv, + u32 tunnel_id) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct tunnel_match_enc_opts enc_opts = {}; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct metadata_dst *tun_dst; + struct tunnel_match_key key; + u32 tun_id, enc_opts_id; + struct net_device *dev; + int err; + + enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK; + tun_id = tunnel_id >> ENC_OPTS_BITS; + + if (!tun_id) + return true; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key); + if (err) { + WARN_ON_ONCE(true); + netdev_dbg(priv->netdev, + "Couldn't find tunnel for tun_id: %d, err: %d\n", + tun_id, err); + return false; + } + + if (enc_opts_id) { + err = mapping_find(uplink_priv->tunnel_enc_opts_mapping, + enc_opts_id, &enc_opts); + if (err) { + netdev_dbg(priv->netdev, + "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n", + enc_opts_id, err); + return false; + } + } + + tun_dst = tun_rx_dst(enc_opts.key.len); + if (!tun_dst) { + WARN_ON_ONCE(true); + return false; + } + + ip_tunnel_key_init(&tun_dst->u.tun_info.key, + key.enc_ipv4.src, key.enc_ipv4.dst, + key.enc_ip.tos, key.enc_ip.ttl, + 0, /* label */ + key.enc_tp.src, key.enc_tp.dst, + key32_to_tunnel_id(key.enc_key_id.keyid), + TUNNEL_KEY); + + if (enc_opts.key.len) + ip_tunnel_info_opts_set(&tun_dst->u.tun_info, + enc_opts.key.data, + enc_opts.key.len, + enc_opts.key.dst_opt_type); + + skb_dst_set(skb, (struct dst_entry *)tun_dst); + dev = dev_get_by_index(&init_net, key.filter_ifindex); + if (!dev) { + netdev_dbg(priv->netdev, + "Couldn't find tunnel device with ifindex: %d\n", + key.filter_ifindex); + return false; + } + + /* Set tun_dev so we do dev_put() after datapath */ + tc_priv->tun_dev = dev; + + skb->dev = dev; + + return true; +} +#endif /* CONFIG_NET_TC_SKB_EXT */ + +bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe, + struct sk_buff *skb, + struct mlx5e_tc_update_priv *tc_priv) +{ +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + u32 chain = 0, reg_c0, reg_c1, tunnel_id, tuple_id; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct tc_skb_ext *tc_skb_ext; + struct mlx5_eswitch *esw; + struct mlx5e_priv *priv; + int tunnel_moffset; + int err; + + reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK); + if (reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG) + reg_c0 = 0; + reg_c1 = be32_to_cpu(cqe->ft_metadata); + + if (!reg_c0) + return true; + + priv = netdev_priv(skb->dev); + esw = priv->mdev->priv.eswitch; + + err = mlx5_eswitch_get_chain_for_tag(esw, reg_c0, &chain); + if (err) { + netdev_dbg(priv->netdev, + "Couldn't find chain for chain tag: %d, err: %d\n", + reg_c0, err); + return false; + } + + if (chain) { + tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT); + if (!tc_skb_ext) { + WARN_ON(1); + return false; + } + + tc_skb_ext->chain = chain; + + tuple_id = reg_c1 & TUPLE_ID_MAX; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + if (!mlx5e_tc_ct_restore_flow(uplink_priv, skb, tuple_id)) + return false; + } + + tunnel_moffset = mlx5e_tc_attr_to_reg_mappings[TUNNEL_TO_REG].moffset; + tunnel_id = reg_c1 >> (8 * tunnel_moffset); + return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id); +#endif /* CONFIG_NET_TC_SKB_EXT */ + + return true; +} + +void mlx5_rep_tc_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv) +{ + if (tc_priv->tun_dev) + dev_put(tc_priv->tun_dev); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h new file mode 100644 index 000000000000..86f92abf2fdd --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies. */ + +#ifndef __MLX5_EN_REP_TC_H__ +#define __MLX5_EN_REP_TC_H__ + +#include <linux/skbuff.h> +#include "en_tc.h" +#include "en_rep.h" + +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) + +int mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv); +void mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv); + +int mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv *rpriv); +void mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv); + +void mlx5e_rep_tc_enable(struct mlx5e_priv *priv); +void mlx5e_rep_tc_disable(struct mlx5e_priv *priv); + +int mlx5e_rep_tc_event_port_affinity(struct mlx5e_priv *priv); + +void mlx5e_rep_update_flows(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + bool neigh_connected, + unsigned char ha[ETH_ALEN]); + +int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e); +void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e); + +int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data); +void mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv); + +bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe, + struct sk_buff *skb, + struct mlx5e_tc_update_priv *tc_priv); +void mlx5_rep_tc_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv); + +#else /* CONFIG_MLX5_CLS_ACT */ + +struct mlx5e_rep_priv; +static inline int +mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv) { return 0; } +static inline void +mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv) {} + +static inline int +mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv *rpriv) { return 0; } +static inline void +mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv) {} + +static inline void +mlx5e_rep_tc_enable(struct mlx5e_priv *priv) {} +static inline void +mlx5e_rep_tc_disable(struct mlx5e_priv *priv) {} + +static inline int +mlx5e_rep_tc_event_port_affinity(struct mlx5e_priv *priv) { return NOTIFY_DONE; } + +static inline int +mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) { return -EOPNOTSUPP; } + +static inline void +mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv) {} + +struct mlx5e_tc_update_priv; +static inline bool +mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe, + struct sk_buff *skb, + struct mlx5e_tc_update_priv *tc_priv) { return true; } +static inline void +mlx5_rep_tc_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv) {} + +#endif /* CONFIG_MLX5_CLS_ACT */ + +#endif /* __MLX5_EN_REP_TC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index b45c3f46570b..e99382f58807 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -4,8 +4,11 @@ #include <net/vxlan.h> #include <net/gre.h> #include <net/geneve.h> +#include <net/bareudp.h> #include "en/tc_tun.h" #include "en_tc.h" +#include "rep/tc.h" +#include "rep/neigh.h" struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev) { @@ -16,6 +19,8 @@ struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev) else if (netif_is_gretap(tunnel_dev) || netif_is_ip6gretap(tunnel_dev)) return &gre_tunnel; + else if (netif_is_bareudp(tunnel_dev)) + return &mplsoudp_tunnel; else return NULL; } @@ -96,9 +101,8 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, } rt = ip_route_output_key(dev_net(mirred_dev), fl4); - ret = PTR_ERR_OR_ZERO(rt); - if (ret) - return ret; + if (IS_ERR(rt)) + return PTR_ERR(rt); if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET) { ip_rt_put(rt); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h index 1630f0ec3ad7..704359df6095 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h @@ -16,6 +16,7 @@ enum { MLX5E_TC_TUNNEL_TYPE_VXLAN, MLX5E_TC_TUNNEL_TYPE_GENEVE, MLX5E_TC_TUNNEL_TYPE_GRETAP, + MLX5E_TC_TUNNEL_TYPE_MPLSOUDP, }; struct mlx5e_tc_tunnel { @@ -46,6 +47,7 @@ struct mlx5e_tc_tunnel { extern struct mlx5e_tc_tunnel vxlan_tunnel; extern struct mlx5e_tc_tunnel geneve_tunnel; extern struct mlx5e_tc_tunnel gre_tunnel; +extern struct mlx5e_tc_tunnel mplsoudp_tunnel; struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c new file mode 100644 index 000000000000..98ee62e427d2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2018 Mellanox Technologies. */ + +#include <net/bareudp.h> +#include <net/mpls.h> +#include "en/tc_tun.h" + +static bool can_offload(struct mlx5e_priv *priv) +{ + return MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_l3_tunnel_to_l2); +} + +static int calc_hlen(struct mlx5e_encap_entry *e) +{ + return sizeof(struct udphdr) + MPLS_HLEN; +} + +static int init_encap_attr(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack) +{ + e->tunnel = &mplsoudp_tunnel; + e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL; + return 0; +} + +static inline __be32 mpls_label_id_field(__be32 label, u8 tos, u8 ttl) +{ + u32 res; + + /* mpls label is 32 bits long and construction as follows: + * 20 bits label + * 3 bits tos + * 1 bit bottom of stack. Since we support only one label, this bit is + * always set. + * 8 bits TTL + */ + res = be32_to_cpu(label) << 12 | 1 << 8 | (tos & 7) << 9 | ttl; + return cpu_to_be32(res); +} + +static int generate_ip_tun_hdr(char buf[], + __u8 *ip_proto, + struct mlx5e_encap_entry *r) +{ + const struct ip_tunnel_key *tun_key = &r->tun_info->key; + __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); + struct udphdr *udp = (struct udphdr *)(buf); + struct mpls_shim_hdr *mpls; + + mpls = (struct mpls_shim_hdr *)(udp + 1); + *ip_proto = IPPROTO_UDP; + + udp->dest = tun_key->tp_dst; + mpls->label_stack_entry = mpls_label_id_field(tun_id, tun_key->tos, tun_key->ttl); + + return 0; +} + +static int parse_udp_ports(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v) +{ + return mlx5e_tc_tun_parse_udp_ports(priv, spec, f, headers_c, headers_v); +} + +static int parse_tunnel(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct flow_match_enc_keyid enc_keyid; + struct flow_match_mpls match; + void *misc2_c; + void *misc2_v; + + misc2_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters_2); + misc2_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters_2); + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS)) + return 0; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) + return 0; + + flow_rule_match_enc_keyid(rule, &enc_keyid); + + if (!enc_keyid.mask->keyid) + return 0; + + if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & + MLX5_FLEX_PROTO_CW_MPLS_UDP)) + return -EOPNOTSUPP; + + flow_rule_match_mpls(rule, &match); + + MLX5_SET(fte_match_set_misc2, misc2_c, + outer_first_mpls_over_udp.mpls_label, match.mask->mpls_label); + MLX5_SET(fte_match_set_misc2, misc2_v, + outer_first_mpls_over_udp.mpls_label, match.key->mpls_label); + + MLX5_SET(fte_match_set_misc2, misc2_c, + outer_first_mpls_over_udp.mpls_exp, match.mask->mpls_tc); + MLX5_SET(fte_match_set_misc2, misc2_v, + outer_first_mpls_over_udp.mpls_exp, match.key->mpls_tc); + + MLX5_SET(fte_match_set_misc2, misc2_c, + outer_first_mpls_over_udp.mpls_s_bos, match.mask->mpls_bos); + MLX5_SET(fte_match_set_misc2, misc2_v, + outer_first_mpls_over_udp.mpls_s_bos, match.key->mpls_bos); + + MLX5_SET(fte_match_set_misc2, misc2_c, + outer_first_mpls_over_udp.mpls_ttl, match.mask->mpls_ttl); + MLX5_SET(fte_match_set_misc2, misc2_v, + outer_first_mpls_over_udp.mpls_ttl, match.key->mpls_ttl); + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; + + return 0; +} + +struct mlx5e_tc_tunnel mplsoudp_tunnel = { + .tunnel_type = MLX5E_TC_TUNNEL_TYPE_MPLSOUDP, + .match_level = MLX5_MATCH_L4, + .can_offload = can_offload, + .calc_hlen = calc_hlen, + .init_encap_attr = init_encap_attr, + .generate_ip_tun_hdr = generate_ip_tun_hdr, + .parse_udp_ports = parse_udp_ports, + .parse_tunnel = parse_tunnel, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 8b86c6ded302..8ff9cbe6943d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3520,41 +3520,6 @@ out: return err; } -#ifdef CONFIG_MLX5_ESWITCH -static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv, - struct flow_cls_offload *cls_flower, - unsigned long flags) -{ - switch (cls_flower->command) { - case FLOW_CLS_REPLACE: - return mlx5e_configure_flower(priv->netdev, priv, cls_flower, - flags); - case FLOW_CLS_DESTROY: - return mlx5e_delete_flower(priv->netdev, priv, cls_flower, - flags); - case FLOW_CLS_STATS: - return mlx5e_stats_flower(priv->netdev, priv, cls_flower, - flags); - default: - return -EOPNOTSUPP; - } -} - -static int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, - void *cb_priv) -{ - unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(NIC_OFFLOAD); - struct mlx5e_priv *priv = cb_priv; - - switch (type) { - case TC_SETUP_CLSFLOWER: - return mlx5e_setup_tc_cls_flower(priv, type_data, flags); - default: - return -EOPNOTSUPP; - } -} -#endif - static LIST_HEAD(mlx5e_block_cb_list); static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, @@ -3563,7 +3528,6 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, struct mlx5e_priv *priv = netdev_priv(dev); switch (type) { -#ifdef CONFIG_MLX5_ESWITCH case TC_SETUP_BLOCK: { struct flow_block_offload *f = type_data; @@ -3573,7 +3537,6 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, mlx5e_setup_tc_block_cb, priv, priv, true); } -#endif case TC_SETUP_QDISC_MQPRIO: return mlx5e_setup_tc_mqprio(priv, type_data); default: @@ -3746,7 +3709,7 @@ static int set_feature_cvlan_filter(struct net_device *netdev, bool enable) return 0; } -#ifdef CONFIG_MLX5_ESWITCH +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) static int set_feature_tc_num_filters(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -3857,7 +3820,7 @@ int mlx5e_set_features(struct net_device *netdev, netdev_features_t features) err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro); err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER, set_feature_cvlan_filter); -#ifdef CONFIG_MLX5_ESWITCH +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TC, set_feature_tc_num_filters); #endif err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXALL, set_feature_rx_all); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 52351c105627..a46405c6d560 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -35,7 +35,6 @@ #include <net/switchdev.h> #include <net/pkt_cls.h> #include <net/act_api.h> -#include <net/netevent.h> #include <net/arp.h> #include <net/devlink.h> #include <net/ipv6_stubs.h> @@ -45,9 +44,9 @@ #include "en.h" #include "en_rep.h" #include "en_tc.h" -#include "en/tc_tun.h" +#include "en/rep/tc.h" +#include "en/rep/neigh.h" #include "fs_core.h" -#include "lib/port_tun.h" #include "lib/mlx5.h" #define CREATE_TRACE_POINTS #include "diag/en_rep_tracepoint.h" @@ -58,16 +57,6 @@ static const char mlx5e_rep_driver_name[] = "mlx5e_rep"; -struct mlx5e_rep_indr_block_priv { - struct net_device *netdev; - struct mlx5e_rep_priv *rpriv; - - struct list_head list; -}; - -static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv, - struct net_device *netdev); - static void mlx5e_rep_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) { @@ -485,706 +474,6 @@ void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) mlx5e_sqs2vport_stop(esw, rep); } -static unsigned long mlx5e_rep_ipv6_interval(void) -{ - if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl) - return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME); - - return ~0UL; -} - -static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv) -{ - unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME); - unsigned long ipv6_interval = mlx5e_rep_ipv6_interval(); - struct net_device *netdev = rpriv->netdev; - struct mlx5e_priv *priv = netdev_priv(netdev); - - rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval); - mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval); -} - -void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv) -{ - struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; - - mlx5_fc_queue_stats_work(priv->mdev, - &neigh_update->neigh_stats_work, - neigh_update->min_interval); -} - -static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe) -{ - return refcount_inc_not_zero(&nhe->refcnt); -} - -static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe); - -static void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe) -{ - if (refcount_dec_and_test(&nhe->refcnt)) { - mlx5e_rep_neigh_entry_remove(nhe); - kfree_rcu(nhe, rcu); - } -} - -static struct mlx5e_neigh_hash_entry * -mlx5e_get_next_nhe(struct mlx5e_rep_priv *rpriv, - struct mlx5e_neigh_hash_entry *nhe) -{ - struct mlx5e_neigh_hash_entry *next = NULL; - - rcu_read_lock(); - - for (next = nhe ? - list_next_or_null_rcu(&rpriv->neigh_update.neigh_list, - &nhe->neigh_list, - struct mlx5e_neigh_hash_entry, - neigh_list) : - list_first_or_null_rcu(&rpriv->neigh_update.neigh_list, - struct mlx5e_neigh_hash_entry, - neigh_list); - next; - next = list_next_or_null_rcu(&rpriv->neigh_update.neigh_list, - &next->neigh_list, - struct mlx5e_neigh_hash_entry, - neigh_list)) - if (mlx5e_rep_neigh_entry_hold(next)) - break; - - rcu_read_unlock(); - - if (nhe) - mlx5e_rep_neigh_entry_release(nhe); - - return next; -} - -static void mlx5e_rep_neigh_stats_work(struct work_struct *work) -{ - struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv, - neigh_update.neigh_stats_work.work); - struct net_device *netdev = rpriv->netdev; - struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5e_neigh_hash_entry *nhe = NULL; - - rtnl_lock(); - if (!list_empty(&rpriv->neigh_update.neigh_list)) - mlx5e_rep_queue_neigh_stats_work(priv); - - while ((nhe = mlx5e_get_next_nhe(rpriv, nhe)) != NULL) - mlx5e_tc_update_neigh_used_value(nhe); - - rtnl_unlock(); -} - -static void mlx5e_rep_update_flows(struct mlx5e_priv *priv, - struct mlx5e_encap_entry *e, - bool neigh_connected, - unsigned char ha[ETH_ALEN]) -{ - struct ethhdr *eth = (struct ethhdr *)e->encap_header; - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - bool encap_connected; - LIST_HEAD(flow_list); - - ASSERT_RTNL(); - - /* wait for encap to be fully initialized */ - wait_for_completion(&e->res_ready); - - mutex_lock(&esw->offloads.encap_tbl_lock); - encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID); - if (e->compl_result < 0 || (encap_connected == neigh_connected && - ether_addr_equal(e->h_dest, ha))) - goto unlock; - - mlx5e_take_all_encap_flows(e, &flow_list); - - if ((e->flags & MLX5_ENCAP_ENTRY_VALID) && - (!neigh_connected || !ether_addr_equal(e->h_dest, ha))) - mlx5e_tc_encap_flows_del(priv, e, &flow_list); - - if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) { - ether_addr_copy(e->h_dest, ha); - ether_addr_copy(eth->h_dest, ha); - /* Update the encap source mac, in case that we delete - * the flows when encap source mac changed. - */ - ether_addr_copy(eth->h_source, e->route_dev->dev_addr); - - mlx5e_tc_encap_flows_add(priv, e, &flow_list); - } -unlock: - mutex_unlock(&esw->offloads.encap_tbl_lock); - mlx5e_put_encap_flow_list(priv, &flow_list); -} - -static void mlx5e_rep_neigh_update(struct work_struct *work) -{ - struct mlx5e_neigh_hash_entry *nhe = - container_of(work, struct mlx5e_neigh_hash_entry, neigh_update_work); - struct neighbour *n = nhe->n; - struct mlx5e_encap_entry *e; - unsigned char ha[ETH_ALEN]; - struct mlx5e_priv *priv; - bool neigh_connected; - u8 nud_state, dead; - - rtnl_lock(); - - /* If these parameters are changed after we release the lock, - * we'll receive another event letting us know about it. - * We use this lock to avoid inconsistency between the neigh validity - * and it's hw address. - */ - read_lock_bh(&n->lock); - memcpy(ha, n->ha, ETH_ALEN); - nud_state = n->nud_state; - dead = n->dead; - read_unlock_bh(&n->lock); - - neigh_connected = (nud_state & NUD_VALID) && !dead; - - trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected); - - list_for_each_entry(e, &nhe->encap_list, encap_list) { - if (!mlx5e_encap_take(e)) - continue; - - priv = netdev_priv(e->out_dev); - mlx5e_rep_update_flows(priv, e, neigh_connected, ha); - mlx5e_encap_put(priv, e); - } - mlx5e_rep_neigh_entry_release(nhe); - rtnl_unlock(); - neigh_release(n); -} - -static struct mlx5e_rep_indr_block_priv * -mlx5e_rep_indr_block_priv_lookup(struct mlx5e_rep_priv *rpriv, - struct net_device *netdev) -{ - struct mlx5e_rep_indr_block_priv *cb_priv; - - /* All callback list access should be protected by RTNL. */ - ASSERT_RTNL(); - - list_for_each_entry(cb_priv, - &rpriv->uplink_priv.tc_indr_block_priv_list, - list) - if (cb_priv->netdev == netdev) - return cb_priv; - - return NULL; -} - -static void mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv) -{ - struct mlx5e_rep_indr_block_priv *cb_priv, *temp; - struct list_head *head = &rpriv->uplink_priv.tc_indr_block_priv_list; - - list_for_each_entry_safe(cb_priv, temp, head, list) { - mlx5e_rep_indr_unregister_block(rpriv, cb_priv->netdev); - kfree(cb_priv); - } -} - -static int -mlx5e_rep_indr_offload(struct net_device *netdev, - struct flow_cls_offload *flower, - struct mlx5e_rep_indr_block_priv *indr_priv, - unsigned long flags) -{ - struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev); - int err = 0; - - switch (flower->command) { - case FLOW_CLS_REPLACE: - err = mlx5e_configure_flower(netdev, priv, flower, flags); - break; - case FLOW_CLS_DESTROY: - err = mlx5e_delete_flower(netdev, priv, flower, flags); - break; - case FLOW_CLS_STATS: - err = mlx5e_stats_flower(netdev, priv, flower, flags); - break; - default: - err = -EOPNOTSUPP; - } - - return err; -} - -static int mlx5e_rep_indr_setup_tc_cb(enum tc_setup_type type, - void *type_data, void *indr_priv) -{ - unsigned long flags = MLX5_TC_FLAG(EGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD); - struct mlx5e_rep_indr_block_priv *priv = indr_priv; - - switch (type) { - case TC_SETUP_CLSFLOWER: - return mlx5e_rep_indr_offload(priv->netdev, type_data, priv, - flags); - default: - return -EOPNOTSUPP; - } -} - -static int mlx5e_rep_indr_setup_ft_cb(enum tc_setup_type type, - void *type_data, void *indr_priv) -{ - struct mlx5e_rep_indr_block_priv *priv = indr_priv; - struct flow_cls_offload *f = type_data; - struct flow_cls_offload tmp; - struct mlx5e_priv *mpriv; - struct mlx5_eswitch *esw; - unsigned long flags; - int err; - - mpriv = netdev_priv(priv->rpriv->netdev); - esw = mpriv->mdev->priv.eswitch; - - flags = MLX5_TC_FLAG(EGRESS) | - MLX5_TC_FLAG(ESW_OFFLOAD) | - MLX5_TC_FLAG(FT_OFFLOAD); - - switch (type) { - case TC_SETUP_CLSFLOWER: - memcpy(&tmp, f, sizeof(*f)); - - /* Re-use tc offload path by moving the ft flow to the - * reserved ft chain. - * - * FT offload can use prio range [0, INT_MAX], so we normalize - * it to range [1, mlx5_esw_chains_get_prio_range(esw)] - * as with tc, where prio 0 isn't supported. - * - * We only support chain 0 of FT offload. - */ - if (!mlx5_esw_chains_prios_supported(esw) || - tmp.common.prio >= mlx5_esw_chains_get_prio_range(esw) || - tmp.common.chain_index) - return -EOPNOTSUPP; - - tmp.common.chain_index = mlx5_esw_chains_get_ft_chain(esw); - tmp.common.prio++; - err = mlx5e_rep_indr_offload(priv->netdev, &tmp, priv, flags); - memcpy(&f->stats, &tmp.stats, sizeof(f->stats)); - return err; - default: - return -EOPNOTSUPP; - } -} - -static void mlx5e_rep_indr_block_unbind(void *cb_priv) -{ - struct mlx5e_rep_indr_block_priv *indr_priv = cb_priv; - - list_del(&indr_priv->list); - kfree(indr_priv); -} - -static LIST_HEAD(mlx5e_block_cb_list); - -static int -mlx5e_rep_indr_setup_block(struct net_device *netdev, - struct mlx5e_rep_priv *rpriv, - struct flow_block_offload *f, - flow_setup_cb_t *setup_cb) -{ - struct mlx5e_rep_indr_block_priv *indr_priv; - struct flow_block_cb *block_cb; - - if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) - return -EOPNOTSUPP; - - f->unlocked_driver_cb = true; - f->driver_block_list = &mlx5e_block_cb_list; - - switch (f->command) { - case FLOW_BLOCK_BIND: - indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev); - if (indr_priv) - return -EEXIST; - - indr_priv = kmalloc(sizeof(*indr_priv), GFP_KERNEL); - if (!indr_priv) - return -ENOMEM; - - indr_priv->netdev = netdev; - indr_priv->rpriv = rpriv; - list_add(&indr_priv->list, - &rpriv->uplink_priv.tc_indr_block_priv_list); - - block_cb = flow_block_cb_alloc(setup_cb, indr_priv, indr_priv, - mlx5e_rep_indr_block_unbind); - if (IS_ERR(block_cb)) { - list_del(&indr_priv->list); - kfree(indr_priv); - return PTR_ERR(block_cb); - } - flow_block_cb_add(block_cb, f); - list_add_tail(&block_cb->driver_list, &mlx5e_block_cb_list); - - return 0; - case FLOW_BLOCK_UNBIND: - indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev); - if (!indr_priv) - return -ENOENT; - - block_cb = flow_block_cb_lookup(f->block, setup_cb, indr_priv); - if (!block_cb) - return -ENOENT; - - flow_block_cb_remove(block_cb, f); - list_del(&block_cb->driver_list); - return 0; - default: - return -EOPNOTSUPP; - } - return 0; -} - -static -int mlx5e_rep_indr_setup_cb(struct net_device *netdev, void *cb_priv, - enum tc_setup_type type, void *type_data) -{ - switch (type) { - case TC_SETUP_BLOCK: - return mlx5e_rep_indr_setup_block(netdev, cb_priv, type_data, - mlx5e_rep_indr_setup_tc_cb); - case TC_SETUP_FT: - return mlx5e_rep_indr_setup_block(netdev, cb_priv, type_data, - mlx5e_rep_indr_setup_ft_cb); - default: - return -EOPNOTSUPP; - } -} - -static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv, - struct net_device *netdev) -{ - int err; - - err = __flow_indr_block_cb_register(netdev, rpriv, - mlx5e_rep_indr_setup_cb, - rpriv); - if (err) { - struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); - - mlx5_core_err(priv->mdev, "Failed to register remote block notifier for %s err=%d\n", - netdev_name(netdev), err); - } - return err; -} - -static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv, - struct net_device *netdev) -{ - __flow_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_cb, - rpriv); -} - -static int mlx5e_nic_rep_netdevice_event(struct notifier_block *nb, - unsigned long event, void *ptr) -{ - struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv, - uplink_priv.netdevice_nb); - struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); - struct net_device *netdev = netdev_notifier_info_to_dev(ptr); - - if (!mlx5e_tc_tun_device_to_offload(priv, netdev) && - !(is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev)) - return NOTIFY_OK; - - switch (event) { - case NETDEV_REGISTER: - mlx5e_rep_indr_register_block(rpriv, netdev); - break; - case NETDEV_UNREGISTER: - mlx5e_rep_indr_unregister_block(rpriv, netdev); - break; - } - return NOTIFY_OK; -} - -static void -mlx5e_rep_queue_neigh_update_work(struct mlx5e_priv *priv, - struct mlx5e_neigh_hash_entry *nhe, - struct neighbour *n) -{ - /* Take a reference to ensure the neighbour and mlx5 encap - * entry won't be destructed until we drop the reference in - * delayed work. - */ - neigh_hold(n); - - /* This assignment is valid as long as the the neigh reference - * is taken - */ - nhe->n = n; - - if (!queue_work(priv->wq, &nhe->neigh_update_work)) { - mlx5e_rep_neigh_entry_release(nhe); - neigh_release(n); - } -} - -static struct mlx5e_neigh_hash_entry * -mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, - struct mlx5e_neigh *m_neigh); - -static int mlx5e_rep_netevent_event(struct notifier_block *nb, - unsigned long event, void *ptr) -{ - struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv, - neigh_update.netevent_nb); - struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; - struct net_device *netdev = rpriv->netdev; - struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5e_neigh_hash_entry *nhe = NULL; - struct mlx5e_neigh m_neigh = {}; - struct neigh_parms *p; - struct neighbour *n; - bool found = false; - - switch (event) { - case NETEVENT_NEIGH_UPDATE: - n = ptr; -#if IS_ENABLED(CONFIG_IPV6) - if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl) -#else - if (n->tbl != &arp_tbl) -#endif - return NOTIFY_DONE; - - m_neigh.dev = n->dev; - m_neigh.family = n->ops->family; - memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len); - - rcu_read_lock(); - nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh); - rcu_read_unlock(); - if (!nhe) - return NOTIFY_DONE; - - mlx5e_rep_queue_neigh_update_work(priv, nhe, n); - break; - - case NETEVENT_DELAY_PROBE_TIME_UPDATE: - p = ptr; - - /* We check the device is present since we don't care about - * changes in the default table, we only care about changes - * done per device delay prob time parameter. - */ -#if IS_ENABLED(CONFIG_IPV6) - if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl)) -#else - if (!p->dev || p->tbl != &arp_tbl) -#endif - return NOTIFY_DONE; - - rcu_read_lock(); - list_for_each_entry_rcu(nhe, &neigh_update->neigh_list, - neigh_list) { - if (p->dev == nhe->m_neigh.dev) { - found = true; - break; - } - } - rcu_read_unlock(); - if (!found) - return NOTIFY_DONE; - - neigh_update->min_interval = min_t(unsigned long, - NEIGH_VAR(p, DELAY_PROBE_TIME), - neigh_update->min_interval); - mlx5_fc_update_sampling_interval(priv->mdev, - neigh_update->min_interval); - break; - } - return NOTIFY_DONE; -} - -static const struct rhashtable_params mlx5e_neigh_ht_params = { - .head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node), - .key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh), - .key_len = sizeof(struct mlx5e_neigh), - .automatic_shrinking = true, -}; - -static int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv) -{ - struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; - int err; - - err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params); - if (err) - return err; - - INIT_LIST_HEAD(&neigh_update->neigh_list); - mutex_init(&neigh_update->encap_lock); - INIT_DELAYED_WORK(&neigh_update->neigh_stats_work, - mlx5e_rep_neigh_stats_work); - mlx5e_rep_neigh_update_init_interval(rpriv); - - rpriv->neigh_update.netevent_nb.notifier_call = mlx5e_rep_netevent_event; - err = register_netevent_notifier(&rpriv->neigh_update.netevent_nb); - if (err) - goto out_err; - return 0; - -out_err: - rhashtable_destroy(&neigh_update->neigh_ht); - return err; -} - -static void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv) -{ - struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; - struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); - - unregister_netevent_notifier(&neigh_update->netevent_nb); - - flush_workqueue(priv->wq); /* flush neigh update works */ - - cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work); - - mutex_destroy(&neigh_update->encap_lock); - rhashtable_destroy(&neigh_update->neigh_ht); -} - -static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv, - struct mlx5e_neigh_hash_entry *nhe) -{ - struct mlx5e_rep_priv *rpriv = priv->ppriv; - int err; - - err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht, - &nhe->rhash_node, - mlx5e_neigh_ht_params); - if (err) - return err; - - list_add_rcu(&nhe->neigh_list, &rpriv->neigh_update.neigh_list); - - return err; -} - -static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe) -{ - struct mlx5e_rep_priv *rpriv = nhe->priv->ppriv; - - mutex_lock(&rpriv->neigh_update.encap_lock); - - list_del_rcu(&nhe->neigh_list); - - rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht, - &nhe->rhash_node, - mlx5e_neigh_ht_params); - mutex_unlock(&rpriv->neigh_update.encap_lock); -} - -/* This function must only be called under the representor's encap_lock or - * inside rcu read lock section. - */ -static struct mlx5e_neigh_hash_entry * -mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, - struct mlx5e_neigh *m_neigh) -{ - struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; - struct mlx5e_neigh_hash_entry *nhe; - - nhe = rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh, - mlx5e_neigh_ht_params); - return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL; -} - -static int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, - struct mlx5e_encap_entry *e, - struct mlx5e_neigh_hash_entry **nhe) -{ - int err; - - *nhe = kzalloc(sizeof(**nhe), GFP_KERNEL); - if (!*nhe) - return -ENOMEM; - - (*nhe)->priv = priv; - memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh)); - INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update); - spin_lock_init(&(*nhe)->encap_list_lock); - INIT_LIST_HEAD(&(*nhe)->encap_list); - refcount_set(&(*nhe)->refcnt, 1); - - err = mlx5e_rep_neigh_entry_insert(priv, *nhe); - if (err) - goto out_free; - return 0; - -out_free: - kfree(*nhe); - return err; -} - -int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, - struct mlx5e_encap_entry *e) -{ - struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; - struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy; - struct mlx5e_neigh_hash_entry *nhe; - int err; - - err = mlx5_tun_entropy_refcount_inc(tun_entropy, e->reformat_type); - if (err) - return err; - - mutex_lock(&rpriv->neigh_update.encap_lock); - nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh); - if (!nhe) { - err = mlx5e_rep_neigh_entry_create(priv, e, &nhe); - if (err) { - mutex_unlock(&rpriv->neigh_update.encap_lock); - mlx5_tun_entropy_refcount_dec(tun_entropy, - e->reformat_type); - return err; - } - } - - e->nhe = nhe; - spin_lock(&nhe->encap_list_lock); - list_add_rcu(&e->encap_list, &nhe->encap_list); - spin_unlock(&nhe->encap_list_lock); - - mutex_unlock(&rpriv->neigh_update.encap_lock); - - return 0; -} - -void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, - struct mlx5e_encap_entry *e) -{ - struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; - struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy; - - if (!e->nhe) - return; - - spin_lock(&e->nhe->encap_list_lock); - list_del_rcu(&e->encap_list); - spin_unlock(&e->nhe->encap_list_lock); - - mlx5e_rep_neigh_entry_release(e->nhe); - e->nhe = NULL; - mlx5_tun_entropy_refcount_dec(tun_entropy, e->reformat_type); -} - static int mlx5e_rep_open(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); @@ -1225,129 +514,6 @@ static int mlx5e_rep_close(struct net_device *dev) return ret; } -static int -mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv, - struct flow_cls_offload *cls_flower, int flags) -{ - switch (cls_flower->command) { - case FLOW_CLS_REPLACE: - return mlx5e_configure_flower(priv->netdev, priv, cls_flower, - flags); - case FLOW_CLS_DESTROY: - return mlx5e_delete_flower(priv->netdev, priv, cls_flower, - flags); - case FLOW_CLS_STATS: - return mlx5e_stats_flower(priv->netdev, priv, cls_flower, - flags); - default: - return -EOPNOTSUPP; - } -} - -static -int mlx5e_rep_setup_tc_cls_matchall(struct mlx5e_priv *priv, - struct tc_cls_matchall_offload *ma) -{ - switch (ma->command) { - case TC_CLSMATCHALL_REPLACE: - return mlx5e_tc_configure_matchall(priv, ma); - case TC_CLSMATCHALL_DESTROY: - return mlx5e_tc_delete_matchall(priv, ma); - case TC_CLSMATCHALL_STATS: - mlx5e_tc_stats_matchall(priv, ma); - return 0; - default: - return -EOPNOTSUPP; - } -} - -static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data, - void *cb_priv) -{ - unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD); - struct mlx5e_priv *priv = cb_priv; - - switch (type) { - case TC_SETUP_CLSFLOWER: - return mlx5e_rep_setup_tc_cls_flower(priv, type_data, flags); - case TC_SETUP_CLSMATCHALL: - return mlx5e_rep_setup_tc_cls_matchall(priv, type_data); - default: - return -EOPNOTSUPP; - } -} - -static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data, - void *cb_priv) -{ - struct flow_cls_offload tmp, *f = type_data; - struct mlx5e_priv *priv = cb_priv; - struct mlx5_eswitch *esw; - unsigned long flags; - int err; - - flags = MLX5_TC_FLAG(INGRESS) | - MLX5_TC_FLAG(ESW_OFFLOAD) | - MLX5_TC_FLAG(FT_OFFLOAD); - esw = priv->mdev->priv.eswitch; - - switch (type) { - case TC_SETUP_CLSFLOWER: - memcpy(&tmp, f, sizeof(*f)); - - if (!mlx5_esw_chains_prios_supported(esw)) - return -EOPNOTSUPP; - - /* Re-use tc offload path by moving the ft flow to the - * reserved ft chain. - * - * FT offload can use prio range [0, INT_MAX], so we normalize - * it to range [1, mlx5_esw_chains_get_prio_range(esw)] - * as with tc, where prio 0 isn't supported. - * - * We only support chain 0 of FT offload. - */ - if (tmp.common.prio >= mlx5_esw_chains_get_prio_range(esw)) - return -EOPNOTSUPP; - if (tmp.common.chain_index != 0) - return -EOPNOTSUPP; - - tmp.common.chain_index = mlx5_esw_chains_get_ft_chain(esw); - tmp.common.prio++; - err = mlx5e_rep_setup_tc_cls_flower(priv, &tmp, flags); - memcpy(&f->stats, &tmp.stats, sizeof(f->stats)); - return err; - default: - return -EOPNOTSUPP; - } -} - -static LIST_HEAD(mlx5e_rep_block_tc_cb_list); -static LIST_HEAD(mlx5e_rep_block_ft_cb_list); -static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, - void *type_data) -{ - struct mlx5e_priv *priv = netdev_priv(dev); - struct flow_block_offload *f = type_data; - - f->unlocked_driver_cb = true; - - switch (type) { - case TC_SETUP_BLOCK: - return flow_block_cb_setup_simple(type_data, - &mlx5e_rep_block_tc_cb_list, - mlx5e_rep_setup_tc_cb, - priv, priv, true); - case TC_SETUP_FT: - return flow_block_cb_setup_simple(type_data, - &mlx5e_rep_block_ft_cb_list, - mlx5e_rep_setup_ft_cb, - priv, priv, true); - default: - return -EOPNOTSUPP; - } -} - bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { struct mlx5e_rep_priv *rpriv = priv->ppriv; @@ -1791,31 +957,23 @@ static int mlx5e_init_uplink_rep_tx(struct mlx5e_rep_priv *rpriv) priv = netdev_priv(netdev); uplink_priv = &rpriv->uplink_priv; - mutex_init(&uplink_priv->unready_flows_lock); - INIT_LIST_HEAD(&uplink_priv->unready_flows); - - /* init shared tc flow table */ - err = mlx5e_tc_esw_init(&uplink_priv->tc_ht); + err = mlx5e_rep_tc_init(rpriv); if (err) return err; mlx5_init_port_tun_entropy(&uplink_priv->tun_entropy, priv->mdev); - /* init indirect block notifications */ - INIT_LIST_HEAD(&uplink_priv->tc_indr_block_priv_list); - uplink_priv->netdevice_nb.notifier_call = mlx5e_nic_rep_netdevice_event; - err = register_netdevice_notifier_dev_net(rpriv->netdev, - &uplink_priv->netdevice_nb, - &uplink_priv->netdevice_nn); + err = mlx5e_rep_tc_netdevice_event_register(rpriv); if (err) { - mlx5_core_err(priv->mdev, "Failed to register netdev notifier\n"); - goto tc_esw_cleanup; + mlx5_core_err(priv->mdev, "Failed to register netdev notifier, err: %d\n", + err); + goto tc_rep_cleanup; } return 0; -tc_esw_cleanup: - mlx5e_tc_esw_cleanup(&uplink_priv->tc_ht); +tc_rep_cleanup: + mlx5e_rep_tc_cleanup(rpriv); return err; } @@ -1845,17 +1003,10 @@ destroy_tises: static void mlx5e_cleanup_uplink_rep_tx(struct mlx5e_rep_priv *rpriv) { - struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; - - /* clean indirect TC block notifications */ - unregister_netdevice_notifier_dev_net(rpriv->netdev, - &uplink_priv->netdevice_nb, - &uplink_priv->netdevice_nn); + mlx5e_rep_tc_netdevice_event_unregister(rpriv); mlx5e_rep_indr_clean_block_privs(rpriv); - /* delete shared tc flow table */ - mlx5e_tc_esw_cleanup(&rpriv->uplink_priv.tc_ht); - mutex_destroy(&rpriv->uplink_priv.unready_flows_lock); + mlx5e_rep_tc_cleanup(rpriv); } static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv) @@ -1897,13 +1048,8 @@ static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event return NOTIFY_OK; } - if (event == MLX5_DEV_EVENT_PORT_AFFINITY) { - struct mlx5e_rep_priv *rpriv = priv->ppriv; - - queue_work(priv->wq, &rpriv->uplink_priv.reoffload_flows_work); - - return NOTIFY_OK; - } + if (event == MLX5_DEV_EVENT_PORT_AFFINITY) + return mlx5e_rep_tc_event_port_affinity(priv); return NOTIFY_DONE; } @@ -1912,7 +1058,6 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) { struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_rep_priv *rpriv = priv->ppriv; u16 max_mtu; netdev->min_mtu = ETH_MIN_MTU; @@ -1920,8 +1065,7 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu); mlx5e_set_dev_port_mtu(priv); - INIT_WORK(&rpriv->uplink_priv.reoffload_flows_work, - mlx5e_tc_reoffload_flows_work); + mlx5e_rep_tc_enable(priv); mlx5_lag_add(mdev, netdev); priv->events_nb.notifier_call = uplink_rep_async_event; @@ -1933,11 +1077,10 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_rep_priv *rpriv = priv->ppriv; mlx5e_dcbnl_delete_app(priv); mlx5_notifier_unregister(mdev, &priv->events_nb); - cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work); + mlx5e_rep_tc_disable(priv); mlx5_lag_remove(mdev); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 6a2337900420..93e911baacad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -158,6 +158,22 @@ struct mlx5e_neigh_hash_entry { enum { /* set when the encap entry is successfully offloaded into HW */ MLX5_ENCAP_ENTRY_VALID = BIT(0), + MLX5_REFORMAT_DECAP = BIT(1), +}; + +struct mlx5e_decap_key { + struct ethhdr key; +}; + +struct mlx5e_decap_entry { + struct mlx5e_decap_key key; + struct list_head flows; + struct hlist_node hlist; + refcount_t refcnt; + struct completion res_ready; + int compl_result; + struct mlx5_pkt_reformat *pkt_reformat; + struct rcu_head rcu; }; struct mlx5e_encap_entry { @@ -203,11 +219,6 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); -int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, - struct mlx5e_encap_entry *e); -void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, - struct mlx5e_encap_entry *e); - void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv); bool mlx5e_eswitch_rep(struct net_device *netdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index fdba52136c5d..6b3c82da199c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -42,6 +42,7 @@ #include "en_tc.h" #include "eswitch.h" #include "en_rep.h" +#include "en/rep/tc.h" #include "ipoib/ipoib.h" #include "en_accel/ipsec_rxtx.h" #include "en_accel/tls_rxtx.h" @@ -1237,12 +1238,12 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) if (rep->vlan && skb_vlan_tag_present(skb)) skb_vlan_pop(skb); - if (!mlx5e_tc_rep_update_skb(cqe, skb, &tc_priv)) + if (!mlx5e_rep_tc_update_skb(cqe, skb, &tc_priv)) goto free_wqe; napi_gro_receive(rq->cq.napi, skb); - mlx5_tc_rep_post_napi_receive(&tc_priv); + mlx5_rep_tc_post_napi_receive(&tc_priv); free_wqe: mlx5e_free_rx_wqe(rq, wi, true); @@ -1293,12 +1294,12 @@ void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); - if (!mlx5e_tc_rep_update_skb(cqe, skb, &tc_priv)) + if (!mlx5e_rep_tc_update_skb(cqe, skb, &tc_priv)) goto mpwrq_cqe_out; napi_gro_receive(rq->cq.napi, skb); - mlx5_tc_rep_post_napi_receive(&tc_priv); + mlx5_rep_tc_post_napi_receive(&tc_priv); mpwrq_cqe_out: if (likely(wi->consumed_strides < rq->mpwqe.num_strides)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index a050808f2128..cc669ea450ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -31,6 +31,7 @@ */ #include <net/flow_dissector.h> +#include <net/flow_offload.h> #include <net/sch_generic.h> #include <net/pkt_cls.h> #include <net/tc_act/tc_gact.h> @@ -45,10 +46,14 @@ #include <net/tc_act/tc_tunnel_key.h> #include <net/tc_act/tc_pedit.h> #include <net/tc_act/tc_csum.h> +#include <net/tc_act/tc_mpls.h> #include <net/arp.h> #include <net/ipv6_stubs.h> +#include <net/bareudp.h> #include "en.h" #include "en_rep.h" +#include "en/rep/tc.h" +#include "en/rep/neigh.h" #include "en_tc.h" #include "eswitch.h" #include "esw/chains.h" @@ -89,6 +94,7 @@ enum { MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5, MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6, MLX5E_TC_FLOW_FLAG_CT = MLX5E_TC_FLOW_BASE + 7, + MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 8, }; #define MLX5E_TC_MAX_SPLITS 1 @@ -122,6 +128,11 @@ struct mlx5e_tc_flow { u64 cookie; unsigned long flags; struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1]; + + /* flows sharing the same reformat object - currently mpls decap */ + struct list_head l3_to_l2_reformat; + struct mlx5e_decap_entry *decap_reformat; + /* Flow can be associated with multiple encap IDs. * The number of encaps is bounded by the number of supported * destinations. @@ -153,40 +164,12 @@ struct mlx5e_tc_flow_parse_attr { struct mlx5_flow_spec spec; struct mlx5e_tc_mod_hdr_acts mod_hdr_acts; int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS]; + struct ethhdr eth; }; #define MLX5E_TC_TABLE_NUM_GROUPS 4 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16) -struct tunnel_match_key { - struct flow_dissector_key_control enc_control; - struct flow_dissector_key_keyid enc_key_id; - struct flow_dissector_key_ports enc_tp; - struct flow_dissector_key_ip enc_ip; - union { - struct flow_dissector_key_ipv4_addrs enc_ipv4; - struct flow_dissector_key_ipv6_addrs enc_ipv6; - }; - - int filter_ifindex; -}; - -struct tunnel_match_enc_opts { - struct flow_dissector_key_enc_opts key; - struct flow_dissector_key_enc_opts mask; -}; - -/* Tunnel_id mapping is TUNNEL_INFO_BITS + ENC_OPTS_BITS. - * Upper TUNNEL_INFO_BITS for general tunnel info. - * Lower ENC_OPTS_BITS bits for enc_opts. - */ -#define TUNNEL_INFO_BITS 6 -#define TUNNEL_INFO_BITS_MASK GENMASK(TUNNEL_INFO_BITS - 1, 0) -#define ENC_OPTS_BITS 2 -#define ENC_OPTS_BITS_MASK GENMASK(ENC_OPTS_BITS - 1, 0) -#define TUNNEL_ID_BITS (TUNNEL_INFO_BITS + ENC_OPTS_BITS) -#define TUNNEL_ID_MASK GENMASK(TUNNEL_ID_BITS - 1, 0) - struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = { [CHAIN_TO_REG] = { .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0, @@ -1149,6 +1132,11 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct netlink_ext_ack *extack, struct net_device **encap_dev, bool *encap_valid); +static int mlx5e_attach_decap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack); +static void mlx5e_detach_decap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow); static struct mlx5_flow_handle * mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, @@ -1324,6 +1312,12 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, return -EOPNOTSUPP; } + if (flow_flag_test(flow, L3_TO_L2_DECAP)) { + err = mlx5e_attach_decap(priv, flow, extack); + if (err) + return err; + } + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { int mirred_ifindex; @@ -1433,6 +1427,9 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) mlx5_fc_destroy(attr->counter_dev, attr->counter); + + if (flow_flag_test(flow, L3_TO_L2_DECAP)) + mlx5e_detach_decap(priv, flow); } void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, @@ -1709,6 +1706,17 @@ static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entr kfree_rcu(e, rcu); } +static void mlx5e_decap_dealloc(struct mlx5e_priv *priv, + struct mlx5e_decap_entry *d) +{ + WARN_ON(!list_empty(&d->flows)); + + if (!d->compl_result) + mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat); + + kfree_rcu(d, rcu); +} + void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; @@ -1721,6 +1729,18 @@ void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) mlx5e_encap_dealloc(priv, e); } +static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock)) + return; + hash_del_rcu(&d->hlist); + mutex_unlock(&esw->offloads.decap_tbl_lock); + + mlx5e_decap_dealloc(priv, d); +} + static void mlx5e_detach_encap(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, int out_index) { @@ -1744,6 +1764,29 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv, mlx5e_encap_dealloc(priv, e); } +static void mlx5e_detach_decap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_decap_entry *d = flow->decap_reformat; + + if (!d) + return; + + mutex_lock(&esw->offloads.decap_tbl_lock); + list_del(&flow->l3_to_l2_reformat); + flow->decap_reformat = NULL; + + if (!refcount_dec_and_test(&d->refcnt)) { + mutex_unlock(&esw->offloads.decap_tbl_lock); + return; + } + hash_del_rcu(&d->hlist); + mutex_unlock(&esw->offloads.decap_tbl_lock); + + mlx5e_decap_dealloc(priv, d); +} + static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) { struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch; @@ -2015,7 +2058,11 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, return err; } - flow->esw_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; + /* With mpls over udp we decapsulate using packet reformat + * object + */ + if (!netif_is_bareudp(filter_dev)) + flow->esw_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; } if (!needs_mapping && !sets_mapping) @@ -2098,6 +2145,20 @@ static int mlx5e_flower_parse_meta(struct net_device *filter_dev, return 0; } +static bool skip_key_basic(struct net_device *filter_dev, + struct flow_cls_offload *f) +{ + /* When doing mpls over udp decap, the user needs to provide + * MPLS_UC as the protocol in order to be able to match on mpls + * label fields. However, the actual ethertype is IP so we want to + * avoid matching on this, otherwise we'll fail the match. + */ + if (netif_is_bareudp(filter_dev) && f->common.chain_index == 0) + return true; + + return false; +} + static int __parse_cls_flower(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, @@ -2142,7 +2203,8 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, BIT(FLOW_DISSECTOR_KEY_IP) | BIT(FLOW_DISSECTOR_KEY_CT) | BIT(FLOW_DISSECTOR_KEY_ENC_IP) | - BIT(FLOW_DISSECTOR_KEY_ENC_OPTS))) { + BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | + BIT(FLOW_DISSECTOR_KEY_MPLS))) { NL_SET_ERR_MSG_MOD(extack, "Unsupported key"); netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n", dissector->used_keys); @@ -2172,7 +2234,8 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, if (err) return err; - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC) && + !skip_key_basic(filter_dev, f)) { struct flow_match_basic match; flow_rule_match_basic(rule, &match); @@ -2837,10 +2900,12 @@ void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) static const struct pedit_headers zero_masks = {}; -static int parse_tc_pedit_action(struct mlx5e_priv *priv, - const struct flow_action_entry *act, int namespace, - struct pedit_headers_action *hdrs, - struct netlink_ext_ack *extack) +static int +parse_pedit_to_modify_hdr(struct mlx5e_priv *priv, + const struct flow_action_entry *act, int namespace, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct pedit_headers_action *hdrs, + struct netlink_ext_ack *extack) { u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1; int err = -EOPNOTSUPP; @@ -2876,6 +2941,46 @@ out_err: return err; } +static int +parse_pedit_to_reformat(struct mlx5e_priv *priv, + const struct flow_action_entry *act, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct netlink_ext_ack *extack) +{ + u32 mask, val, offset; + u32 *p; + + if (act->id != FLOW_ACTION_MANGLE) + return -EOPNOTSUPP; + + if (act->mangle.htype != FLOW_ACT_MANGLE_HDR_TYPE_ETH) { + NL_SET_ERR_MSG_MOD(extack, "Only Ethernet modification is supported"); + return -EOPNOTSUPP; + } + + mask = ~act->mangle.mask; + val = act->mangle.val; + offset = act->mangle.offset; + p = (u32 *)&parse_attr->eth; + *(p + (offset >> 2)) |= (val & mask); + + return 0; +} + +static int parse_tc_pedit_action(struct mlx5e_priv *priv, + const struct flow_action_entry *act, int namespace, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct pedit_headers_action *hdrs, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) +{ + if (flow && flow_flag_test(flow, L3_TO_L2_DECAP)) + return parse_pedit_to_reformat(priv, act, parse_attr, extack); + + return parse_pedit_to_modify_hdr(priv, act, namespace, + parse_attr, hdrs, extack); +} + static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace, struct mlx5e_tc_flow_parse_attr *parse_attr, struct pedit_headers_action *hdrs, @@ -3134,7 +3239,7 @@ static int add_vlan_rewrite_action(struct mlx5e_priv *priv, int namespace, return -EOPNOTSUPP; } - err = parse_tc_pedit_action(priv, &pedit_act, namespace, hdrs, NULL); + err = parse_tc_pedit_action(priv, &pedit_act, namespace, parse_attr, hdrs, NULL, extack); *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; return err; @@ -3200,7 +3305,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, case FLOW_ACTION_MANGLE: case FLOW_ACTION_ADD: err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_KERNEL, - hdrs, extack); + parse_attr, hdrs, NULL, extack); if (err) return err; @@ -3294,12 +3399,22 @@ static inline int cmp_encap_info(struct encap_key *a, a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type; } +static inline int cmp_decap_info(struct mlx5e_decap_key *a, + struct mlx5e_decap_key *b) +{ + return memcmp(&a->key, &b->key, sizeof(b->key)); +} + static inline int hash_encap_info(struct encap_key *key) { return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key), key->tc_tunnel->tunnel_type); } +static inline int hash_decap_info(struct mlx5e_decap_key *key) +{ + return jhash(&key->key, sizeof(key->key), 0); +} static bool is_merged_eswitch_dev(struct mlx5e_priv *priv, struct net_device *peer_netdev) @@ -3314,13 +3429,16 @@ static bool is_merged_eswitch_dev(struct mlx5e_priv *priv, same_hw_devs(priv, peer_priv)); } - - bool mlx5e_encap_take(struct mlx5e_encap_entry *e) { return refcount_inc_not_zero(&e->refcnt); } +static bool mlx5e_decap_take(struct mlx5e_decap_entry *e) +{ + return refcount_inc_not_zero(&e->refcnt); +} + static struct mlx5e_encap_entry * mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key, uintptr_t hash_key) @@ -3341,6 +3459,24 @@ mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key, return NULL; } +static struct mlx5e_decap_entry * +mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key, + uintptr_t hash_key) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_decap_key r_key; + struct mlx5e_decap_entry *e; + + hash_for_each_possible_rcu(esw->offloads.decap_tbl, e, + hlist, hash_key) { + r_key = e->key; + if (!cmp_decap_info(&r_key, key) && + mlx5e_decap_take(e)) + return e; + } + return NULL; +} + static struct ip_tunnel_info *dup_tun_info(const struct ip_tunnel_info *tun_info) { size_t tun_size = sizeof(*tun_info) + tun_info->options_len; @@ -3486,6 +3622,84 @@ out_err_init: return err; } +static int mlx5e_attach_decap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *attr = flow->esw_attr; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5e_decap_entry *d; + struct mlx5e_decap_key key; + uintptr_t hash_key; + int err; + + parse_attr = attr->parse_attr; + if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) { + NL_SET_ERR_MSG_MOD(extack, + "encap header larger than max supported"); + return -EOPNOTSUPP; + } + + key.key = parse_attr->eth; + hash_key = hash_decap_info(&key); + mutex_lock(&esw->offloads.decap_tbl_lock); + d = mlx5e_decap_get(priv, &key, hash_key); + if (d) { + mutex_unlock(&esw->offloads.decap_tbl_lock); + wait_for_completion(&d->res_ready); + mutex_lock(&esw->offloads.decap_tbl_lock); + if (d->compl_result) { + err = -EREMOTEIO; + goto out_free; + } + goto found; + } + + d = kzalloc(sizeof(*d), GFP_KERNEL); + if (!d) { + err = -ENOMEM; + goto out_err; + } + + d->key = key; + refcount_set(&d->refcnt, 1); + init_completion(&d->res_ready); + INIT_LIST_HEAD(&d->flows); + hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key); + mutex_unlock(&esw->offloads.decap_tbl_lock); + + d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, + MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2, + sizeof(parse_attr->eth), + &parse_attr->eth, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(d->pkt_reformat)) { + err = PTR_ERR(d->pkt_reformat); + d->compl_result = err; + } + mutex_lock(&esw->offloads.decap_tbl_lock); + complete_all(&d->res_ready); + if (err) + goto out_free; + +found: + flow->decap_reformat = d; + attr->decap_pkt_reformat = d->pkt_reformat; + list_add(&flow->l3_to_l2_reformat, &d->flows); + mutex_unlock(&esw->offloads.decap_tbl_lock); + return 0; + +out_free: + mutex_unlock(&esw->offloads.decap_tbl_lock); + mlx5e_decap_put(priv, d); + return err; + +out_err: + mutex_unlock(&esw->offloads.decap_tbl_lock); + return err; +} + static int parse_tc_vlan_action(struct mlx5e_priv *priv, const struct flow_action_entry *act, struct mlx5_esw_flow_attr *attr, @@ -3697,7 +3911,8 @@ static int verify_uplink_forwarding(struct mlx5e_priv *priv, static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct flow_action *flow_action, struct mlx5e_tc_flow *flow, - struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack, + struct net_device *filter_dev) { struct pedit_headers_action hdrs[2] = {}; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; @@ -3711,6 +3926,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, bool encap = false, decap = false; u32 action = attr->action; int err, i, if_count = 0; + bool mpls_push = false; if (!flow_action_has_entries(flow_action)) return -EINVAL; @@ -3725,15 +3941,48 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, action |= MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; break; + case FLOW_ACTION_MPLS_PUSH: + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + reformat_l2_to_l3_tunnel) || + act->mpls_push.proto != htons(ETH_P_MPLS_UC)) { + NL_SET_ERR_MSG_MOD(extack, + "mpls push is supported only for mpls_uc protocol"); + return -EOPNOTSUPP; + } + mpls_push = true; + break; + case FLOW_ACTION_MPLS_POP: + /* we only support mpls pop if it is the first action + * and the filter net device is bareudp. Subsequent + * actions can be pedit and the last can be mirred + * egress redirect. + */ + if (i) { + NL_SET_ERR_MSG_MOD(extack, + "mpls pop supported only as first action"); + return -EOPNOTSUPP; + } + if (!netif_is_bareudp(filter_dev)) { + NL_SET_ERR_MSG_MOD(extack, + "mpls pop supported only on bareudp devices"); + return -EOPNOTSUPP; + } + + parse_attr->eth.h_proto = act->mpls_pop.proto; + action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_flag_set(flow, L3_TO_L2_DECAP); + break; case FLOW_ACTION_MANGLE: case FLOW_ACTION_ADD: err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB, - hdrs, extack); + parse_attr, hdrs, flow, extack); if (err) return err; - action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - attr->split_count = attr->out_count; + if (!flow_flag_test(flow, L3_TO_L2_DECAP)) { + action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + attr->split_count = attr->out_count; + } break; case FLOW_ACTION_CSUM: if (csum_offload_supported(priv, action, @@ -3755,6 +4004,12 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, return -EINVAL; } + if (mpls_push && !netif_is_bareudp(out_dev)) { + NL_SET_ERR_MSG_MOD(extack, + "mpls is supported only through a bareudp device"); + return -EOPNOTSUPP; + } + if (ft_flow && out_dev == priv->netdev) { /* Ignore forward to self rules generated * by adding both mlx5 devs to the flow table @@ -4085,6 +4340,7 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, INIT_LIST_HEAD(&flow->encaps[out_index].list); INIT_LIST_HEAD(&flow->mod_hdr); INIT_LIST_HEAD(&flow->hairpin); + INIT_LIST_HEAD(&flow->l3_to_l2_reformat); refcount_set(&flow->refcnt, 1); init_completion(&flow->init_done); @@ -4154,7 +4410,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, if (err) goto err_free; - err = parse_tc_fdb_actions(priv, &rule->action, flow, extack); + err = parse_tc_fdb_actions(priv, &rule->action, flow, extack, filter_dev); if (err) goto err_free; @@ -4806,148 +5062,35 @@ void mlx5e_tc_reoffload_flows_work(struct work_struct *work) mutex_unlock(&rpriv->unready_flows_lock); } -#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) -static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb, - struct mlx5e_tc_update_priv *tc_priv, - u32 tunnel_id) -{ - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct tunnel_match_enc_opts enc_opts = {}; - struct mlx5_rep_uplink_priv *uplink_priv; - struct mlx5e_rep_priv *uplink_rpriv; - struct metadata_dst *tun_dst; - struct tunnel_match_key key; - u32 tun_id, enc_opts_id; - struct net_device *dev; - int err; - - enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK; - tun_id = tunnel_id >> ENC_OPTS_BITS; - - if (!tun_id) - return true; - - uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); - uplink_priv = &uplink_rpriv->uplink_priv; - - err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key); - if (err) { - WARN_ON_ONCE(true); - netdev_dbg(priv->netdev, - "Couldn't find tunnel for tun_id: %d, err: %d\n", - tun_id, err); - return false; - } - - if (enc_opts_id) { - err = mapping_find(uplink_priv->tunnel_enc_opts_mapping, - enc_opts_id, &enc_opts); - if (err) { - netdev_dbg(priv->netdev, - "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n", - enc_opts_id, err); - return false; - } - } - - tun_dst = tun_rx_dst(enc_opts.key.len); - if (!tun_dst) { - WARN_ON_ONCE(true); - return false; - } - - ip_tunnel_key_init(&tun_dst->u.tun_info.key, - key.enc_ipv4.src, key.enc_ipv4.dst, - key.enc_ip.tos, key.enc_ip.ttl, - 0, /* label */ - key.enc_tp.src, key.enc_tp.dst, - key32_to_tunnel_id(key.enc_key_id.keyid), - TUNNEL_KEY); - - if (enc_opts.key.len) - ip_tunnel_info_opts_set(&tun_dst->u.tun_info, - enc_opts.key.data, - enc_opts.key.len, - enc_opts.key.dst_opt_type); - - skb_dst_set(skb, (struct dst_entry *)tun_dst); - dev = dev_get_by_index(&init_net, key.filter_ifindex); - if (!dev) { - netdev_dbg(priv->netdev, - "Couldn't find tunnel device with ifindex: %d\n", - key.filter_ifindex); - return false; +static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv, + struct flow_cls_offload *cls_flower, + unsigned long flags) +{ + switch (cls_flower->command) { + case FLOW_CLS_REPLACE: + return mlx5e_configure_flower(priv->netdev, priv, cls_flower, + flags); + case FLOW_CLS_DESTROY: + return mlx5e_delete_flower(priv->netdev, priv, cls_flower, + flags); + case FLOW_CLS_STATS: + return mlx5e_stats_flower(priv->netdev, priv, cls_flower, + flags); + default: + return -EOPNOTSUPP; } - - /* Set tun_dev so we do dev_put() after datapath */ - tc_priv->tun_dev = dev; - - skb->dev = dev; - - return true; } -#endif /* CONFIG_NET_TC_SKB_EXT */ -bool mlx5e_tc_rep_update_skb(struct mlx5_cqe64 *cqe, - struct sk_buff *skb, - struct mlx5e_tc_update_priv *tc_priv) +int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) { -#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) - u32 chain = 0, reg_c0, reg_c1, tunnel_id, tuple_id; - struct mlx5_rep_uplink_priv *uplink_priv; - struct mlx5e_rep_priv *uplink_rpriv; - struct tc_skb_ext *tc_skb_ext; - struct mlx5_eswitch *esw; - struct mlx5e_priv *priv; - int tunnel_moffset; - int err; - - reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK); - if (reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG) - reg_c0 = 0; - reg_c1 = be32_to_cpu(cqe->ft_metadata); - - if (!reg_c0) - return true; - - priv = netdev_priv(skb->dev); - esw = priv->mdev->priv.eswitch; - - err = mlx5_eswitch_get_chain_for_tag(esw, reg_c0, &chain); - if (err) { - netdev_dbg(priv->netdev, - "Couldn't find chain for chain tag: %d, err: %d\n", - reg_c0, err); - return false; - } - - if (chain) { - tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT); - if (!tc_skb_ext) { - WARN_ON(1); - return false; - } - - tc_skb_ext->chain = chain; + unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(NIC_OFFLOAD); + struct mlx5e_priv *priv = cb_priv; - tuple_id = reg_c1 & TUPLE_ID_MAX; - - uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); - uplink_priv = &uplink_rpriv->uplink_priv; - if (!mlx5e_tc_ct_restore_flow(uplink_priv, skb, tuple_id)) - return false; + switch (type) { + case TC_SETUP_CLSFLOWER: + return mlx5e_setup_tc_cls_flower(priv, type_data, flags); + default: + return -EOPNOTSUPP; } - - tunnel_moffset = mlx5e_tc_attr_to_reg_mappings[TUNNEL_TO_REG].moffset; - tunnel_id = reg_c1 >> (8 * tunnel_moffset); - return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id); -#endif /* CONFIG_NET_TC_SKB_EXT */ - - return true; -} - -void mlx5_tc_rep_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv) -{ - if (tc_priv->tun_dev) - dev_put(tc_priv->tun_dev); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index abdcfa4c4e0e..037aa73bf9ab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -34,11 +34,41 @@ #define __MLX5_EN_TC_H__ #include <net/pkt_cls.h> +#include "en.h" #define MLX5E_TC_FLOW_ID_MASK 0x0000ffff #ifdef CONFIG_MLX5_ESWITCH +struct tunnel_match_key { + struct flow_dissector_key_control enc_control; + struct flow_dissector_key_keyid enc_key_id; + struct flow_dissector_key_ports enc_tp; + struct flow_dissector_key_ip enc_ip; + union { + struct flow_dissector_key_ipv4_addrs enc_ipv4; + struct flow_dissector_key_ipv6_addrs enc_ipv6; + }; + + int filter_ifindex; +}; + +struct tunnel_match_enc_opts { + struct flow_dissector_key_enc_opts key; + struct flow_dissector_key_enc_opts mask; +}; + +/* Tunnel_id mapping is TUNNEL_INFO_BITS + ENC_OPTS_BITS. + * Upper TUNNEL_INFO_BITS for general tunnel info. + * Lower ENC_OPTS_BITS bits for enc_opts. + */ +#define TUNNEL_INFO_BITS 6 +#define TUNNEL_INFO_BITS_MASK GENMASK(TUNNEL_INFO_BITS - 1, 0) +#define ENC_OPTS_BITS 2 +#define ENC_OPTS_BITS_MASK GENMASK(ENC_OPTS_BITS - 1, 0) +#define TUNNEL_ID_BITS (TUNNEL_INFO_BITS + ENC_OPTS_BITS) +#define TUNNEL_ID_MASK GENMASK(TUNNEL_ID_BITS - 1, 0) + enum { MLX5E_TC_FLAG_INGRESS_BIT, MLX5E_TC_FLAG_EGRESS_BIT, @@ -50,9 +80,6 @@ enum { #define MLX5_TC_FLAG(flag) BIT(MLX5E_TC_FLAG_##flag##_BIT) -int mlx5e_tc_nic_init(struct mlx5e_priv *priv); -void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv); - int mlx5e_tc_esw_init(struct rhashtable *tc_ht); void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht); @@ -119,11 +146,6 @@ struct mlx5e_tc_update_priv { struct net_device *tun_dev; }; -bool mlx5e_tc_rep_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb, - struct mlx5e_tc_update_priv *tc_priv); - -void mlx5_tc_rep_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv); - struct mlx5e_tc_mod_hdr_acts { int num_actions; int max_actions; @@ -148,6 +170,22 @@ void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); struct mlx5e_tc_flow; u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow); +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) + +int mlx5e_tc_nic_init(struct mlx5e_priv *priv); +void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv); + +int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv); + +#else /* CONFIG_MLX5_CLS_ACT */ +static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; } +static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {} +static inline int +mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) +{ return -EOPNOTSUPP; } +#endif /* CONFIG_MLX5_CLS_ACT */ + #else /* CONFIG_MLX5_ESWITCH */ static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; } static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {} @@ -156,6 +194,10 @@ static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv, { return 0; } + +static inline int +mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) +{ return -EOPNOTSUPP; } #endif #endif /* __MLX5_EN_TC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.h index f8c4239846ea..7679ac359e31 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.h @@ -6,6 +6,8 @@ #include "eswitch.h" +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) + bool mlx5_esw_chains_prios_supported(struct mlx5_eswitch *esw); bool @@ -46,4 +48,21 @@ void mlx5_esw_chains_destroy(struct mlx5_eswitch *esw); int mlx5_eswitch_get_chain_for_tag(struct mlx5_eswitch *esw, u32 tag, u32 *chain); +#else /* CONFIG_MLX5_CLS_ACT */ + +static inline struct mlx5_flow_table * +mlx5_esw_chains_get_table(struct mlx5_eswitch *esw, u32 chain, u32 prio, + u32 level) { return ERR_PTR(-EOPNOTSUPP); } +static inline void +mlx5_esw_chains_put_table(struct mlx5_eswitch *esw, u32 chain, u32 prio, + u32 level) {} + +static inline struct mlx5_flow_table * +mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw) { return ERR_PTR(-EOPNOTSUPP); } + +static inline int mlx5_esw_chains_create(struct mlx5_eswitch *esw) { return 0; } +static inline void mlx5_esw_chains_destroy(struct mlx5_eswitch *esw) {} + +#endif /* CONFIG_MLX5_CLS_ACT */ + #endif /* __ML5_ESW_CHAINS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index c5eb4e7754a9..ac79b7c9aeb3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -2262,6 +2262,8 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) hash_init(esw->offloads.encap_tbl); mutex_init(&esw->offloads.mod_hdr.lock); hash_init(esw->offloads.mod_hdr.hlist); + mutex_init(&esw->offloads.decap_tbl_lock); + hash_init(esw->offloads.decap_tbl); atomic64_set(&esw->offloads.num_flows, 0); mutex_init(&esw->state_lock); mutex_init(&esw->mode_lock); @@ -2303,6 +2305,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) mutex_destroy(&esw->state_lock); mutex_destroy(&esw->offloads.mod_hdr.lock); mutex_destroy(&esw->offloads.encap_tbl_lock); + mutex_destroy(&esw->offloads.decap_tbl_lock); kfree(esw->vports); kfree(esw); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 4a1c6c78bb14..ccbbea3e0505 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -209,6 +209,8 @@ struct mlx5_esw_offload { struct mutex peer_mutex; struct mutex encap_tbl_lock; /* protects encap_tbl */ DECLARE_HASHTABLE(encap_tbl, 8); + struct mutex decap_tbl_lock; /* protects decap_tbl */ + DECLARE_HASHTABLE(decap_tbl, 8); struct mod_hdr_tbl mod_hdr; DECLARE_HASHTABLE(termtbl_tbl, 8); struct mutex termtbl_mutex; /* protects termtbl hash */ @@ -432,6 +434,7 @@ struct mlx5_esw_flow_attr { struct mlx5_flow_table *fdb; struct mlx5_flow_table *dest_ft; struct mlx5_ct_attr ct_attr; + struct mlx5_pkt_reformat *decap_pkt_reformat; struct mlx5e_tc_flow_parse_attr *parse_attr; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 57ac2ef52e80..554fc64d8ef6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -366,6 +366,10 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, } } } + + if (attr->decap_pkt_reformat) + flow_act.pkt_reformat = attr->decap_pkt_reformat; + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; dest[i].counter_id = mlx5_fc_id(attr->counter); @@ -1727,7 +1731,9 @@ static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw, static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw) { +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) mlx5e_tc_clean_fdb_peer_flows(esw); +#endif esw_del_fdb_peer_miss_rules(esw); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c index 8809a65ecefb..e042e0924079 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c @@ -144,11 +144,11 @@ static int mlx5_set_entropy(struct mlx5_tun_entropy *tun_entropy, int mlx5_tun_entropy_refcount_inc(struct mlx5_tun_entropy *tun_entropy, int reformat_type) { - /* the default is error for unknown (non VXLAN/GRE tunnel types) */ int err = -EOPNOTSUPP; mutex_lock(&tun_entropy->lock); - if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_VXLAN && + if ((reformat_type == MLX5_REFORMAT_TYPE_L2_TO_VXLAN || + reformat_type == MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL) && tun_entropy->enabled) { /* in case entropy calculation is enabled for all tunneling * types, it is ok for VXLAN, so approve. diff --git a/include/net/bareudp.h b/include/net/bareudp.h index cb03f6f15956..dc65a0d71d9b 100644 --- a/include/net/bareudp.h +++ b/include/net/bareudp.h @@ -5,6 +5,7 @@ #include <linux/types.h> #include <linux/skbuff.h> +#include <net/rtnetlink.h> struct bareudp_conf { __be16 ethertype; @@ -17,4 +18,10 @@ struct net_device *bareudp_dev_create(struct net *net, const char *name, u8 name_assign_type, struct bareudp_conf *info); +static inline bool netif_is_bareudp(const struct net_device *dev) +{ + return dev->rtnl_link_ops && + !strcmp(dev->rtnl_link_ops->kind, "bareudp"); +} + #endif |