aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Kconfig17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c368
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h34
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c711
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h81
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c137
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c41
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c887
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.h21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c509
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.h58
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/chains.h19
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c4
-rw-r--r--include/net/bareudp.h7
21 files changed, 1816 insertions, 1121 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index 7d69a3061f17..4256d59eca2b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -78,9 +78,24 @@ config MLX5_ESWITCH
Legacy SRIOV mode (L2 mac vlan steering based).
Switchdev mode (eswitch offloads).
+config MLX5_CLS_ACT
+ bool "MLX5 TC classifier action support"
+ depends on MLX5_ESWITCH && NET_CLS_ACT
+ default y
+ help
+ mlx5 ConnectX offloads support for TC classifier action (NET_CLS_ACT),
+ works in both native NIC mdoe and Switchdev SRIOV mode.
+ Actions get attached to a Hardware offloaded classifiers and are
+ invoked after a successful classification. Actions are used to
+ overwrite the classification result, instantly drop or redirect and/or
+ reformat packets in wire speeds without involving the host cpu.
+
+ If set to N, TC offloads in both NIC and switchdev modes will be disabled.
+ If unsure, set to Y
+
config MLX5_TC_CT
bool "MLX5 TC connection tracking offload support"
- depends on MLX5_CORE_EN && NET_SWITCHDEV && NF_FLOW_TABLE && NET_ACT_CT && NET_TC_SKB_EXT
+ depends on MLX5_CLS_ACT && NF_FLOW_TABLE && NET_ACT_CT && NET_TC_SKB_EXT
default y
help
Say Y here if you want to support offloading connection tracking rules
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index d3c7dbd7f1d5..e5ee9103fefb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -33,17 +33,19 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o
mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o
mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
-mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o \
- lib/geneve.o en/mapping.o en/tc_tun_vxlan.o en/tc_tun_gre.o \
- en/tc_tun_geneve.o diag/en_tc_tracepoint.o
mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o
+mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o lib/geneve.o lib/port_tun.o lag_mp.o
+mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en_tc.o en/rep/tc.o en/rep/neigh.o \
+ en/mapping.o esw/chains.o en/tc_tun.o \
+ en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \
+ en/tc_tun_mplsoudp.o diag/en_tc_tracepoint.o
mlx5_core-$(CONFIG_MLX5_TC_CT) += en/tc_ct.o
#
# Core extra
#
mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \
- ecpf.o rdma.o esw/chains.o
+ ecpf.o rdma.o
mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o
mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
new file mode 100644
index 000000000000..baa162432e75
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
@@ -0,0 +1,368 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#include <linux/refcount.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/rtnetlink.h>
+#include <linux/workqueue.h>
+#include <linux/rwlock.h>
+#include <linux/spinlock.h>
+#include <linux/notifier.h>
+#include <net/netevent.h>
+#include "neigh.h"
+#include "tc.h"
+#include "en_rep.h"
+#include "fs_core.h"
+#include "diag/en_rep_tracepoint.h"
+
+static unsigned long mlx5e_rep_ipv6_interval(void)
+{
+ if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl)
+ return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME);
+
+ return ~0UL;
+}
+
+static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv)
+{
+ unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
+ unsigned long ipv6_interval = mlx5e_rep_ipv6_interval();
+ struct net_device *netdev = rpriv->netdev;
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval);
+ mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval);
+}
+
+void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+
+ mlx5_fc_queue_stats_work(priv->mdev,
+ &neigh_update->neigh_stats_work,
+ neigh_update->min_interval);
+}
+
+static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe)
+{
+ return refcount_inc_not_zero(&nhe->refcnt);
+}
+
+static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe);
+
+void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe)
+{
+ if (refcount_dec_and_test(&nhe->refcnt)) {
+ mlx5e_rep_neigh_entry_remove(nhe);
+ kfree_rcu(nhe, rcu);
+ }
+}
+
+static struct mlx5e_neigh_hash_entry *
+mlx5e_get_next_nhe(struct mlx5e_rep_priv *rpriv,
+ struct mlx5e_neigh_hash_entry *nhe)
+{
+ struct mlx5e_neigh_hash_entry *next = NULL;
+
+ rcu_read_lock();
+
+ for (next = nhe ?
+ list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
+ &nhe->neigh_list,
+ struct mlx5e_neigh_hash_entry,
+ neigh_list) :
+ list_first_or_null_rcu(&rpriv->neigh_update.neigh_list,
+ struct mlx5e_neigh_hash_entry,
+ neigh_list);
+ next;
+ next = list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
+ &next->neigh_list,
+ struct mlx5e_neigh_hash_entry,
+ neigh_list))
+ if (mlx5e_rep_neigh_entry_hold(next))
+ break;
+
+ rcu_read_unlock();
+
+ if (nhe)
+ mlx5e_rep_neigh_entry_release(nhe);
+
+ return next;
+}
+
+static void mlx5e_rep_neigh_stats_work(struct work_struct *work)
+{
+ struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv,
+ neigh_update.neigh_stats_work.work);
+ struct net_device *netdev = rpriv->netdev;
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_neigh_hash_entry *nhe = NULL;
+
+ rtnl_lock();
+ if (!list_empty(&rpriv->neigh_update.neigh_list))
+ mlx5e_rep_queue_neigh_stats_work(priv);
+
+ while ((nhe = mlx5e_get_next_nhe(rpriv, nhe)) != NULL)
+ mlx5e_tc_update_neigh_used_value(nhe);
+
+ rtnl_unlock();
+}
+
+static void mlx5e_rep_neigh_update(struct work_struct *work)
+{
+ struct mlx5e_neigh_hash_entry *nhe =
+ container_of(work, struct mlx5e_neigh_hash_entry, neigh_update_work);
+ struct neighbour *n = nhe->n;
+ struct mlx5e_encap_entry *e;
+ unsigned char ha[ETH_ALEN];
+ struct mlx5e_priv *priv;
+ bool neigh_connected;
+ u8 nud_state, dead;
+
+ rtnl_lock();
+
+ /* If these parameters are changed after we release the lock,
+ * we'll receive another event letting us know about it.
+ * We use this lock to avoid inconsistency between the neigh validity
+ * and it's hw address.
+ */
+ read_lock_bh(&n->lock);
+ memcpy(ha, n->ha, ETH_ALEN);
+ nud_state = n->nud_state;
+ dead = n->dead;
+ read_unlock_bh(&n->lock);
+
+ neigh_connected = (nud_state & NUD_VALID) && !dead;
+
+ trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected);
+
+ list_for_each_entry(e, &nhe->encap_list, encap_list) {
+ if (!mlx5e_encap_take(e))
+ continue;
+
+ priv = netdev_priv(e->out_dev);
+ mlx5e_rep_update_flows(priv, e, neigh_connected, ha);
+ mlx5e_encap_put(priv, e);
+ }
+ mlx5e_rep_neigh_entry_release(nhe);
+ rtnl_unlock();
+ neigh_release(n);
+}
+
+static void mlx5e_rep_queue_neigh_update_work(struct mlx5e_priv *priv,
+ struct mlx5e_neigh_hash_entry *nhe,
+ struct neighbour *n)
+{
+ /* Take a reference to ensure the neighbour and mlx5 encap
+ * entry won't be destructed until we drop the reference in
+ * delayed work.
+ */
+ neigh_hold(n);
+
+ /* This assignment is valid as long as the the neigh reference
+ * is taken
+ */
+ nhe->n = n;
+
+ if (!queue_work(priv->wq, &nhe->neigh_update_work)) {
+ mlx5e_rep_neigh_entry_release(nhe);
+ neigh_release(n);
+ }
+}
+
+static int mlx5e_rep_netevent_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
+ neigh_update.netevent_nb);
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+ struct net_device *netdev = rpriv->netdev;
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_neigh_hash_entry *nhe = NULL;
+ struct mlx5e_neigh m_neigh = {};
+ struct neigh_parms *p;
+ struct neighbour *n;
+ bool found = false;
+
+ switch (event) {
+ case NETEVENT_NEIGH_UPDATE:
+ n = ptr;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl)
+#else
+ if (n->tbl != &arp_tbl)
+#endif
+ return NOTIFY_DONE;
+
+ m_neigh.dev = n->dev;
+ m_neigh.family = n->ops->family;
+ memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
+
+ rcu_read_lock();
+ nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh);
+ rcu_read_unlock();
+ if (!nhe)
+ return NOTIFY_DONE;
+
+ mlx5e_rep_queue_neigh_update_work(priv, nhe, n);
+ break;
+
+ case NETEVENT_DELAY_PROBE_TIME_UPDATE:
+ p = ptr;
+
+ /* We check the device is present since we don't care about
+ * changes in the default table, we only care about changes
+ * done per device delay prob time parameter.
+ */
+#if IS_ENABLED(CONFIG_IPV6)
+ if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl))
+#else
+ if (!p->dev || p->tbl != &arp_tbl)
+#endif
+ return NOTIFY_DONE;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(nhe, &neigh_update->neigh_list,
+ neigh_list) {
+ if (p->dev == nhe->m_neigh.dev) {
+ found = true;
+ break;
+ }
+ }
+ rcu_read_unlock();
+ if (!found)
+ return NOTIFY_DONE;
+
+ neigh_update->min_interval = min_t(unsigned long,
+ NEIGH_VAR(p, DELAY_PROBE_TIME),
+ neigh_update->min_interval);
+ mlx5_fc_update_sampling_interval(priv->mdev,
+ neigh_update->min_interval);
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static const struct rhashtable_params mlx5e_neigh_ht_params = {
+ .head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node),
+ .key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh),
+ .key_len = sizeof(struct mlx5e_neigh),
+ .automatic_shrinking = true,
+};
+
+int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+ int err;
+
+ err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params);
+ if (err)
+ return err;
+
+ INIT_LIST_HEAD(&neigh_update->neigh_list);
+ mutex_init(&neigh_update->encap_lock);
+ INIT_DELAYED_WORK(&neigh_update->neigh_stats_work,
+ mlx5e_rep_neigh_stats_work);
+ mlx5e_rep_neigh_update_init_interval(rpriv);
+
+ rpriv->neigh_update.netevent_nb.notifier_call = mlx5e_rep_netevent_event;
+ err = register_netevent_notifier(&rpriv->neigh_update.netevent_nb);
+ if (err)
+ goto out_err;
+ return 0;
+
+out_err:
+ rhashtable_destroy(&neigh_update->neigh_ht);
+ return err;
+}
+
+void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+
+ unregister_netevent_notifier(&neigh_update->netevent_nb);
+
+ flush_workqueue(priv->wq); /* flush neigh update works */
+
+ cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work);
+
+ mutex_destroy(&neigh_update->encap_lock);
+ rhashtable_destroy(&neigh_update->neigh_ht);
+}
+
+static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv,
+ struct mlx5e_neigh_hash_entry *nhe)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ int err;
+
+ err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht,
+ &nhe->rhash_node,
+ mlx5e_neigh_ht_params);
+ if (err)
+ return err;
+
+ list_add_rcu(&nhe->neigh_list, &rpriv->neigh_update.neigh_list);
+
+ return err;
+}
+
+static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe)
+{
+ struct mlx5e_rep_priv *rpriv = nhe->priv->ppriv;
+
+ mutex_lock(&rpriv->neigh_update.encap_lock);
+
+ list_del_rcu(&nhe->neigh_list);
+
+ rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht,
+ &nhe->rhash_node,
+ mlx5e_neigh_ht_params);
+ mutex_unlock(&rpriv->neigh_update.encap_lock);
+}
+
+/* This function must only be called under the representor's encap_lock or
+ * inside rcu read lock section.
+ */
+struct mlx5e_neigh_hash_entry *
+mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
+ struct mlx5e_neigh *m_neigh)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+ struct mlx5e_neigh_hash_entry *nhe;
+
+ nhe = rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh,
+ mlx5e_neigh_ht_params);
+ return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL;
+}
+
+int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct mlx5e_neigh_hash_entry **nhe)
+{
+ int err;
+
+ *nhe = kzalloc(sizeof(**nhe), GFP_KERNEL);
+ if (!*nhe)
+ return -ENOMEM;
+
+ (*nhe)->priv = priv;
+ memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh));
+ INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update);
+ spin_lock_init(&(*nhe)->encap_list_lock);
+ INIT_LIST_HEAD(&(*nhe)->encap_list);
+ refcount_set(&(*nhe)->refcnt, 1);
+
+ err = mlx5e_rep_neigh_entry_insert(priv, *nhe);
+ if (err)
+ goto out_free;
+ return 0;
+
+out_free:
+ kfree(*nhe);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h
new file mode 100644
index 000000000000..32b239189c95
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_REP_NEIGH__
+#define __MLX5_EN_REP_NEIGH__
+
+#include "en.h"
+#include "en_rep.h"
+
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+
+int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv);
+void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv);
+
+struct mlx5e_neigh_hash_entry *
+mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
+ struct mlx5e_neigh *m_neigh);
+int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct mlx5e_neigh_hash_entry **nhe);
+void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe);
+
+void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv);
+
+#else /* CONFIG_MLX5_CLS_ACT */
+
+static inline int
+mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv) { return 0; }
+static inline void
+mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv) {}
+
+#endif /* CONFIG_MLX5_CLS_ACT */
+
+#endif /* __MLX5_EN_REP_NEIGH__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
new file mode 100644
index 000000000000..c609a5e50ebc
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -0,0 +1,711 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#include <net/dst_metadata.h>
+#include <linux/netdevice.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/rtnetlink.h>
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+#include "tc.h"
+#include "neigh.h"
+#include "en_rep.h"
+#include "eswitch.h"
+#include "esw/chains.h"
+#include "en/tc_ct.h"
+#include "en/mapping.h"
+#include "en/tc_tun.h"
+#include "lib/port_tun.h"
+
+struct mlx5e_rep_indr_block_priv {
+ struct net_device *netdev;
+ struct mlx5e_rep_priv *rpriv;
+
+ struct list_head list;
+};
+
+int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+ struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy;
+ struct mlx5e_neigh_hash_entry *nhe;
+ int err;
+
+ err = mlx5_tun_entropy_refcount_inc(tun_entropy, e->reformat_type);
+ if (err)
+ return err;
+
+ mutex_lock(&rpriv->neigh_update.encap_lock);
+ nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh);
+ if (!nhe) {
+ err = mlx5e_rep_neigh_entry_create(priv, e, &nhe);
+ if (err) {
+ mutex_unlock(&rpriv->neigh_update.encap_lock);
+ mlx5_tun_entropy_refcount_dec(tun_entropy,
+ e->reformat_type);
+ return err;
+ }
+ }
+
+ e->nhe = nhe;
+ spin_lock(&nhe->encap_list_lock);
+ list_add_rcu(&e->encap_list, &nhe->encap_list);
+ spin_unlock(&nhe->encap_list_lock);
+
+ mutex_unlock(&rpriv->neigh_update.encap_lock);
+
+ return 0;
+}
+
+void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+ struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy;
+
+ if (!e->nhe)
+ return;
+
+ spin_lock(&e->nhe->encap_list_lock);
+ list_del_rcu(&e->encap_list);
+ spin_unlock(&e->nhe->encap_list_lock);
+
+ mlx5e_rep_neigh_entry_release(e->nhe);
+ e->nhe = NULL;
+ mlx5_tun_entropy_refcount_dec(tun_entropy, e->reformat_type);
+}
+
+void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ bool neigh_connected,
+ unsigned char ha[ETH_ALEN])
+{
+ struct ethhdr *eth = (struct ethhdr *)e->encap_header;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ bool encap_connected;
+ LIST_HEAD(flow_list);
+
+ ASSERT_RTNL();
+
+ /* wait for encap to be fully initialized */
+ wait_for_completion(&e->res_ready);
+
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID);
+ if (e->compl_result < 0 || (encap_connected == neigh_connected &&
+ ether_addr_equal(e->h_dest, ha)))
+ goto unlock;
+
+ mlx5e_take_all_encap_flows(e, &flow_list);
+
+ if ((e->flags & MLX5_ENCAP_ENTRY_VALID) &&
+ (!neigh_connected || !ether_addr_equal(e->h_dest, ha)))
+ mlx5e_tc_encap_flows_del(priv, e, &flow_list);
+
+ if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) {
+ ether_addr_copy(e->h_dest, ha);
+ ether_addr_copy(eth->h_dest, ha);
+ /* Update the encap source mac, in case that we delete
+ * the flows when encap source mac changed.
+ */
+ ether_addr_copy(eth->h_source, e->route_dev->dev_addr);
+
+ mlx5e_tc_encap_flows_add(priv, e, &flow_list);
+ }
+unlock:
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ mlx5e_put_encap_flow_list(priv, &flow_list);
+}
+
+static int
+mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv,
+ struct flow_cls_offload *cls_flower, int flags)
+{
+ switch (cls_flower->command) {
+ case FLOW_CLS_REPLACE:
+ return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
+ flags);
+ case FLOW_CLS_DESTROY:
+ return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
+ flags);
+ case FLOW_CLS_STATS:
+ return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
+ flags);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static
+int mlx5e_rep_setup_tc_cls_matchall(struct mlx5e_priv *priv,
+ struct tc_cls_matchall_offload *ma)
+{
+ switch (ma->command) {
+ case TC_CLSMATCHALL_REPLACE:
+ return mlx5e_tc_configure_matchall(priv, ma);
+ case TC_CLSMATCHALL_DESTROY:
+ return mlx5e_tc_delete_matchall(priv, ma);
+ case TC_CLSMATCHALL_STATS:
+ mlx5e_tc_stats_matchall(priv, ma);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
+{
+ unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD);
+ struct mlx5e_priv *priv = cb_priv;
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ return mlx5e_rep_setup_tc_cls_flower(priv, type_data, flags);
+ case TC_SETUP_CLSMATCHALL:
+ return mlx5e_rep_setup_tc_cls_matchall(priv, type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
+{
+ struct flow_cls_offload tmp, *f = type_data;
+ struct mlx5e_priv *priv = cb_priv;
+ struct mlx5_eswitch *esw;
+ unsigned long flags;
+ int err;
+
+ flags = MLX5_TC_FLAG(INGRESS) |
+ MLX5_TC_FLAG(ESW_OFFLOAD) |
+ MLX5_TC_FLAG(FT_OFFLOAD);
+ esw = priv->mdev->priv.eswitch;
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ memcpy(&tmp, f, sizeof(*f));
+
+ if (!mlx5_esw_chains_prios_supported(esw))
+ return -EOPNOTSUPP;
+
+ /* Re-use tc offload path by moving the ft flow to the
+ * reserved ft chain.
+ *
+ * FT offload can use prio range [0, INT_MAX], so we normalize
+ * it to range [1, mlx5_esw_chains_get_prio_range(esw)]
+ * as with tc, where prio 0 isn't supported.
+ *
+ * We only support chain 0 of FT offload.
+ */
+ if (tmp.common.prio >= mlx5_esw_chains_get_prio_range(esw))
+ return -EOPNOTSUPP;
+ if (tmp.common.chain_index != 0)
+ return -EOPNOTSUPP;
+
+ tmp.common.chain_index = mlx5_esw_chains_get_ft_chain(esw);
+ tmp.common.prio++;
+ err = mlx5e_rep_setup_tc_cls_flower(priv, &tmp, flags);
+ memcpy(&f->stats, &tmp.stats, sizeof(f->stats));
+ return err;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static LIST_HEAD(mlx5e_rep_block_tc_cb_list);
+static LIST_HEAD(mlx5e_rep_block_ft_cb_list);
+int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct flow_block_offload *f = type_data;
+
+ f->unlocked_driver_cb = true;
+
+ switch (type) {
+ case TC_SETUP_BLOCK:
+ return flow_block_cb_setup_simple(type_data,
+ &mlx5e_rep_block_tc_cb_list,
+ mlx5e_rep_setup_tc_cb,
+ priv, priv, true);
+ case TC_SETUP_FT:
+ return flow_block_cb_setup_simple(type_data,
+ &mlx5e_rep_block_ft_cb_list,
+ mlx5e_rep_setup_ft_cb,
+ priv, priv, true);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+int mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+ int err;
+
+ mutex_init(&uplink_priv->unready_flows_lock);
+ INIT_LIST_HEAD(&uplink_priv->unready_flows);
+
+ /* init shared tc flow table */
+ err = mlx5e_tc_esw_init(&uplink_priv->tc_ht);
+ return err;
+}
+
+void mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv)
+{
+ /* delete shared tc flow table */
+ mlx5e_tc_esw_cleanup(&rpriv->uplink_priv.tc_ht);
+ mutex_destroy(&rpriv->uplink_priv.unready_flows_lock);
+}
+
+void mlx5e_rep_tc_enable(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ INIT_WORK(&rpriv->uplink_priv.reoffload_flows_work,
+ mlx5e_tc_reoffload_flows_work);
+}
+
+void mlx5e_rep_tc_disable(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work);
+}
+
+int mlx5e_rep_tc_event_port_affinity(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ queue_work(priv->wq, &rpriv->uplink_priv.reoffload_flows_work);
+
+ return NOTIFY_OK;
+}
+
+static struct mlx5e_rep_indr_block_priv *
+mlx5e_rep_indr_block_priv_lookup(struct mlx5e_rep_priv *rpriv,
+ struct net_device *netdev)
+{
+ struct mlx5e_rep_indr_block_priv *cb_priv;
+
+ /* All callback list access should be protected by RTNL. */
+ ASSERT_RTNL();
+
+ list_for_each_entry(cb_priv,
+ &rpriv->uplink_priv.tc_indr_block_priv_list,
+ list)
+ if (cb_priv->netdev == netdev)
+ return cb_priv;
+
+ return NULL;
+}
+
+static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv,
+ struct net_device *netdev);
+
+void mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5e_rep_indr_block_priv *cb_priv, *temp;
+ struct list_head *head = &rpriv->uplink_priv.tc_indr_block_priv_list;
+
+ list_for_each_entry_safe(cb_priv, temp, head, list) {
+ mlx5e_rep_indr_unregister_block(rpriv, cb_priv->netdev);
+ kfree(cb_priv);
+ }
+}
+
+static int
+mlx5e_rep_indr_offload(struct net_device *netdev,
+ struct flow_cls_offload *flower,
+ struct mlx5e_rep_indr_block_priv *indr_priv,
+ unsigned long flags)
+{
+ struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev);
+ int err = 0;
+
+ switch (flower->command) {
+ case FLOW_CLS_REPLACE:
+ err = mlx5e_configure_flower(netdev, priv, flower, flags);
+ break;
+ case FLOW_CLS_DESTROY:
+ err = mlx5e_delete_flower(netdev, priv, flower, flags);
+ break;
+ case FLOW_CLS_STATS:
+ err = mlx5e_stats_flower(netdev, priv, flower, flags);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ }
+
+ return err;
+}
+
+static int mlx5e_rep_indr_setup_tc_cb(enum tc_setup_type type,
+ void *type_data, void *indr_priv)
+{
+ unsigned long flags = MLX5_TC_FLAG(EGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD);
+ struct mlx5e_rep_indr_block_priv *priv = indr_priv;
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ return mlx5e_rep_indr_offload(priv->netdev, type_data, priv,
+ flags);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_rep_indr_setup_ft_cb(enum tc_setup_type type,
+ void *type_data, void *indr_priv)
+{
+ struct mlx5e_rep_indr_block_priv *priv = indr_priv;
+ struct flow_cls_offload *f = type_data;
+ struct flow_cls_offload tmp;
+ struct mlx5e_priv *mpriv;
+ struct mlx5_eswitch *esw;
+ unsigned long flags;
+ int err;
+
+ mpriv = netdev_priv(priv->rpriv->netdev);
+ esw = mpriv->mdev->priv.eswitch;
+
+ flags = MLX5_TC_FLAG(EGRESS) |
+ MLX5_TC_FLAG(ESW_OFFLOAD) |
+ MLX5_TC_FLAG(FT_OFFLOAD);
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ memcpy(&tmp, f, sizeof(*f));
+
+ /* Re-use tc offload path by moving the ft flow to the
+ * reserved ft chain.
+ *
+ * FT offload can use prio range [0, INT_MAX], so we normalize
+ * it to range [1, mlx5_esw_chains_get_prio_range(esw)]
+ * as with tc, where prio 0 isn't supported.
+ *
+ * We only support chain 0 of FT offload.
+ */
+ if (!mlx5_esw_chains_prios_supported(esw) ||
+ tmp.common.prio >= mlx5_esw_chains_get_prio_range(esw) ||
+ tmp.common.chain_index)
+ return -EOPNOTSUPP;
+
+ tmp.common.chain_index = mlx5_esw_chains_get_ft_chain(esw);
+ tmp.common.prio++;
+ err = mlx5e_rep_indr_offload(priv->netdev, &tmp, priv, flags);
+ memcpy(&f->stats, &tmp.stats, sizeof(f->stats));
+ return err;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static void mlx5e_rep_indr_block_unbind(void *cb_priv)
+{
+ struct mlx5e_rep_indr_block_priv *indr_priv = cb_priv;
+
+ list_del(&indr_priv->list);
+ kfree(indr_priv);
+}
+
+static LIST_HEAD(mlx5e_block_cb_list);
+
+static int
+mlx5e_rep_indr_setup_block(struct net_device *netdev,
+ struct mlx5e_rep_priv *rpriv,
+ struct flow_block_offload *f,
+ flow_setup_cb_t *setup_cb)
+{
+ struct mlx5e_rep_indr_block_priv *indr_priv;
+ struct flow_block_cb *block_cb;
+
+ if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+ return -EOPNOTSUPP;
+
+ f->unlocked_driver_cb = true;
+ f->driver_block_list = &mlx5e_block_cb_list;
+
+ switch (f->command) {
+ case FLOW_BLOCK_BIND:
+ indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev);
+ if (indr_priv)
+ return -EEXIST;
+
+ indr_priv = kmalloc(sizeof(*indr_priv), GFP_KERNEL);
+ if (!indr_priv)
+ return -ENOMEM;
+
+ indr_priv->netdev = netdev;
+ indr_priv->rpriv = rpriv;
+ list_add(&indr_priv->list,
+ &rpriv->uplink_priv.tc_indr_block_priv_list);
+
+ block_cb = flow_block_cb_alloc(setup_cb, indr_priv, indr_priv,
+ mlx5e_rep_indr_block_unbind);
+ if (IS_ERR(block_cb)) {
+ list_del(&indr_priv->list);
+ kfree(indr_priv);
+ return PTR_ERR(block_cb);
+ }
+ flow_block_cb_add(block_cb, f);
+ list_add_tail(&block_cb->driver_list, &mlx5e_block_cb_list);
+
+ return 0;
+ case FLOW_BLOCK_UNBIND:
+ indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev);
+ if (!indr_priv)
+ return -ENOENT;
+
+ block_cb = flow_block_cb_lookup(f->block, setup_cb, indr_priv);
+ if (!block_cb)
+ return -ENOENT;
+
+ flow_block_cb_remove(block_cb, f);
+ list_del(&block_cb->driver_list);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static
+int mlx5e_rep_indr_setup_cb(struct net_device *netdev, void *cb_priv,
+ enum tc_setup_type type, void *type_data)
+{
+ switch (type) {
+ case TC_SETUP_BLOCK:
+ return mlx5e_rep_indr_setup_block(netdev, cb_priv, type_data,
+ mlx5e_rep_indr_setup_tc_cb);
+ case TC_SETUP_FT:
+ return mlx5e_rep_indr_setup_block(netdev, cb_priv, type_data,
+ mlx5e_rep_indr_setup_ft_cb);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv,
+ struct net_device *netdev)
+{
+ int err;
+
+ err = __flow_indr_block_cb_register(netdev, rpriv,
+ mlx5e_rep_indr_setup_cb,
+ rpriv);
+ if (err) {
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+
+ mlx5_core_err(priv->mdev, "Failed to register remote block notifier for %s err=%d\n",
+ netdev_name(netdev), err);
+ }
+ return err;
+}
+
+static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv,
+ struct net_device *netdev)
+{
+ __flow_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_cb,
+ rpriv);
+}
+
+static int mlx5e_nic_rep_netdevice_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
+ uplink_priv.netdevice_nb);
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+
+ if (!mlx5e_tc_tun_device_to_offload(priv, netdev) &&
+ !(is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev))
+ return NOTIFY_OK;
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ mlx5e_rep_indr_register_block(rpriv, netdev);
+ break;
+ case NETDEV_UNREGISTER:
+ mlx5e_rep_indr_unregister_block(rpriv, netdev);
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+int mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+ int err;
+
+ /* init indirect block notifications */
+ INIT_LIST_HEAD(&uplink_priv->tc_indr_block_priv_list);
+
+ uplink_priv->netdevice_nb.notifier_call = mlx5e_nic_rep_netdevice_event;
+ err = register_netdevice_notifier_dev_net(rpriv->netdev,
+ &uplink_priv->netdevice_nb,
+ &uplink_priv->netdevice_nn);
+ return err;
+}
+
+void mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+
+ /* clean indirect TC block notifications */
+ unregister_netdevice_notifier_dev_net(rpriv->netdev,
+ &uplink_priv->netdevice_nb,
+ &uplink_priv->netdevice_nn);
+}
+
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb,
+ struct mlx5e_tc_update_priv *tc_priv,
+ u32 tunnel_id)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct tunnel_match_enc_opts enc_opts = {};
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct metadata_dst *tun_dst;
+ struct tunnel_match_key key;
+ u32 tun_id, enc_opts_id;
+ struct net_device *dev;
+ int err;
+
+ enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK;
+ tun_id = tunnel_id >> ENC_OPTS_BITS;
+
+ if (!tun_id)
+ return true;
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+
+ err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key);
+ if (err) {
+ WARN_ON_ONCE(true);
+ netdev_dbg(priv->netdev,
+ "Couldn't find tunnel for tun_id: %d, err: %d\n",
+ tun_id, err);
+ return false;
+ }
+
+ if (enc_opts_id) {
+ err = mapping_find(uplink_priv->tunnel_enc_opts_mapping,
+ enc_opts_id, &enc_opts);
+ if (err) {
+ netdev_dbg(priv->netdev,
+ "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n",
+ enc_opts_id, err);
+ return false;
+ }
+ }
+
+ tun_dst = tun_rx_dst(enc_opts.key.len);
+ if (!tun_dst) {
+ WARN_ON_ONCE(true);
+ return false;
+ }
+
+ ip_tunnel_key_init(&tun_dst->u.tun_info.key,
+ key.enc_ipv4.src, key.enc_ipv4.dst,
+ key.enc_ip.tos, key.enc_ip.ttl,
+ 0, /* label */
+ key.enc_tp.src, key.enc_tp.dst,
+ key32_to_tunnel_id(key.enc_key_id.keyid),
+ TUNNEL_KEY);
+
+ if (enc_opts.key.len)
+ ip_tunnel_info_opts_set(&tun_dst->u.tun_info,
+ enc_opts.key.data,
+ enc_opts.key.len,
+ enc_opts.key.dst_opt_type);
+
+ skb_dst_set(skb, (struct dst_entry *)tun_dst);
+ dev = dev_get_by_index(&init_net, key.filter_ifindex);
+ if (!dev) {
+ netdev_dbg(priv->netdev,
+ "Couldn't find tunnel device with ifindex: %d\n",
+ key.filter_ifindex);
+ return false;
+ }
+
+ /* Set tun_dev so we do dev_put() after datapath */
+ tc_priv->tun_dev = dev;
+
+ skb->dev = dev;
+
+ return true;
+}
+#endif /* CONFIG_NET_TC_SKB_EXT */
+
+bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
+ struct sk_buff *skb,
+ struct mlx5e_tc_update_priv *tc_priv)
+{
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ u32 chain = 0, reg_c0, reg_c1, tunnel_id, tuple_id;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct tc_skb_ext *tc_skb_ext;
+ struct mlx5_eswitch *esw;
+ struct mlx5e_priv *priv;
+ int tunnel_moffset;
+ int err;
+
+ reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK);
+ if (reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG)
+ reg_c0 = 0;
+ reg_c1 = be32_to_cpu(cqe->ft_metadata);
+
+ if (!reg_c0)
+ return true;
+
+ priv = netdev_priv(skb->dev);
+ esw = priv->mdev->priv.eswitch;
+
+ err = mlx5_eswitch_get_chain_for_tag(esw, reg_c0, &chain);
+ if (err) {
+ netdev_dbg(priv->netdev,
+ "Couldn't find chain for chain tag: %d, err: %d\n",
+ reg_c0, err);
+ return false;
+ }
+
+ if (chain) {
+ tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT);
+ if (!tc_skb_ext) {
+ WARN_ON(1);
+ return false;
+ }
+
+ tc_skb_ext->chain = chain;
+
+ tuple_id = reg_c1 & TUPLE_ID_MAX;
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+ if (!mlx5e_tc_ct_restore_flow(uplink_priv, skb, tuple_id))
+ return false;
+ }
+
+ tunnel_moffset = mlx5e_tc_attr_to_reg_mappings[TUNNEL_TO_REG].moffset;
+ tunnel_id = reg_c1 >> (8 * tunnel_moffset);
+ return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id);
+#endif /* CONFIG_NET_TC_SKB_EXT */
+
+ return true;
+}
+
+void mlx5_rep_tc_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv)
+{
+ if (tc_priv->tun_dev)
+ dev_put(tc_priv->tun_dev);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h
new file mode 100644
index 000000000000..86f92abf2fdd
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_REP_TC_H__
+#define __MLX5_EN_REP_TC_H__
+
+#include <linux/skbuff.h>
+#include "en_tc.h"
+#include "en_rep.h"
+
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+
+int mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv);
+void mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv);
+
+int mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv *rpriv);
+void mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv);
+
+void mlx5e_rep_tc_enable(struct mlx5e_priv *priv);
+void mlx5e_rep_tc_disable(struct mlx5e_priv *priv);
+
+int mlx5e_rep_tc_event_port_affinity(struct mlx5e_priv *priv);
+
+void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ bool neigh_connected,
+ unsigned char ha[ETH_ALEN]);
+
+int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e);
+void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e);
+
+int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data);
+void mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv);
+
+bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
+ struct sk_buff *skb,
+ struct mlx5e_tc_update_priv *tc_priv);
+void mlx5_rep_tc_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv);
+
+#else /* CONFIG_MLX5_CLS_ACT */
+
+struct mlx5e_rep_priv;
+static inline int
+mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv) { return 0; }
+static inline void
+mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv) {}
+
+static inline int
+mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv *rpriv) { return 0; }
+static inline void
+mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv) {}
+
+static inline void
+mlx5e_rep_tc_enable(struct mlx5e_priv *priv) {}
+static inline void
+mlx5e_rep_tc_disable(struct mlx5e_priv *priv) {}
+
+static inline int
+mlx5e_rep_tc_event_port_affinity(struct mlx5e_priv *priv) { return NOTIFY_DONE; }
+
+static inline int
+mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data) { return -EOPNOTSUPP; }
+
+static inline void
+mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv) {}
+
+struct mlx5e_tc_update_priv;
+static inline bool
+mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
+ struct sk_buff *skb,
+ struct mlx5e_tc_update_priv *tc_priv) { return true; }
+static inline void
+mlx5_rep_tc_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv) {}
+
+#endif /* CONFIG_MLX5_CLS_ACT */
+
+#endif /* __MLX5_EN_REP_TC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
index b45c3f46570b..e99382f58807 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -4,8 +4,11 @@
#include <net/vxlan.h>
#include <net/gre.h>
#include <net/geneve.h>
+#include <net/bareudp.h>
#include "en/tc_tun.h"
#include "en_tc.h"
+#include "rep/tc.h"
+#include "rep/neigh.h"
struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev)
{
@@ -16,6 +19,8 @@ struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev)
else if (netif_is_gretap(tunnel_dev) ||
netif_is_ip6gretap(tunnel_dev))
return &gre_tunnel;
+ else if (netif_is_bareudp(tunnel_dev))
+ return &mplsoudp_tunnel;
else
return NULL;
}
@@ -96,9 +101,8 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
}
rt = ip_route_output_key(dev_net(mirred_dev), fl4);
- ret = PTR_ERR_OR_ZERO(rt);
- if (ret)
- return ret;
+ if (IS_ERR(rt))
+ return PTR_ERR(rt);
if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET) {
ip_rt_put(rt);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
index 1630f0ec3ad7..704359df6095 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
@@ -16,6 +16,7 @@ enum {
MLX5E_TC_TUNNEL_TYPE_VXLAN,
MLX5E_TC_TUNNEL_TYPE_GENEVE,
MLX5E_TC_TUNNEL_TYPE_GRETAP,
+ MLX5E_TC_TUNNEL_TYPE_MPLSOUDP,
};
struct mlx5e_tc_tunnel {
@@ -46,6 +47,7 @@ struct mlx5e_tc_tunnel {
extern struct mlx5e_tc_tunnel vxlan_tunnel;
extern struct mlx5e_tc_tunnel geneve_tunnel;
extern struct mlx5e_tc_tunnel gre_tunnel;
+extern struct mlx5e_tc_tunnel mplsoudp_tunnel;
struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
new file mode 100644
index 000000000000..98ee62e427d2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include <net/bareudp.h>
+#include <net/mpls.h>
+#include "en/tc_tun.h"
+
+static bool can_offload(struct mlx5e_priv *priv)
+{
+ return MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_l3_tunnel_to_l2);
+}
+
+static int calc_hlen(struct mlx5e_encap_entry *e)
+{
+ return sizeof(struct udphdr) + MPLS_HLEN;
+}
+
+static int init_encap_attr(struct net_device *tunnel_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack)
+{
+ e->tunnel = &mplsoudp_tunnel;
+ e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL;
+ return 0;
+}
+
+static inline __be32 mpls_label_id_field(__be32 label, u8 tos, u8 ttl)
+{
+ u32 res;
+
+ /* mpls label is 32 bits long and construction as follows:
+ * 20 bits label
+ * 3 bits tos
+ * 1 bit bottom of stack. Since we support only one label, this bit is
+ * always set.
+ * 8 bits TTL
+ */
+ res = be32_to_cpu(label) << 12 | 1 << 8 | (tos & 7) << 9 | ttl;
+ return cpu_to_be32(res);
+}
+
+static int generate_ip_tun_hdr(char buf[],
+ __u8 *ip_proto,
+ struct mlx5e_encap_entry *r)
+{
+ const struct ip_tunnel_key *tun_key = &r->tun_info->key;
+ __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id);
+ struct udphdr *udp = (struct udphdr *)(buf);
+ struct mpls_shim_hdr *mpls;
+
+ mpls = (struct mpls_shim_hdr *)(udp + 1);
+ *ip_proto = IPPROTO_UDP;
+
+ udp->dest = tun_key->tp_dst;
+ mpls->label_stack_entry = mpls_label_id_field(tun_id, tun_key->tos, tun_key->ttl);
+
+ return 0;
+}
+
+static int parse_udp_ports(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ return mlx5e_tc_tun_parse_udp_ports(priv, spec, f, headers_c, headers_v);
+}
+
+static int parse_tunnel(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct flow_match_enc_keyid enc_keyid;
+ struct flow_match_mpls match;
+ void *misc2_c;
+ void *misc2_v;
+
+ misc2_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters_2);
+ misc2_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters_2);
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS))
+ return 0;
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID))
+ return 0;
+
+ flow_rule_match_enc_keyid(rule, &enc_keyid);
+
+ if (!enc_keyid.mask->keyid)
+ return 0;
+
+ if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) &
+ MLX5_FLEX_PROTO_CW_MPLS_UDP))
+ return -EOPNOTSUPP;
+
+ flow_rule_match_mpls(rule, &match);
+
+ MLX5_SET(fte_match_set_misc2, misc2_c,
+ outer_first_mpls_over_udp.mpls_label, match.mask->mpls_label);
+ MLX5_SET(fte_match_set_misc2, misc2_v,
+ outer_first_mpls_over_udp.mpls_label, match.key->mpls_label);
+
+ MLX5_SET(fte_match_set_misc2, misc2_c,
+ outer_first_mpls_over_udp.mpls_exp, match.mask->mpls_tc);
+ MLX5_SET(fte_match_set_misc2, misc2_v,
+ outer_first_mpls_over_udp.mpls_exp, match.key->mpls_tc);
+
+ MLX5_SET(fte_match_set_misc2, misc2_c,
+ outer_first_mpls_over_udp.mpls_s_bos, match.mask->mpls_bos);
+ MLX5_SET(fte_match_set_misc2, misc2_v,
+ outer_first_mpls_over_udp.mpls_s_bos, match.key->mpls_bos);
+
+ MLX5_SET(fte_match_set_misc2, misc2_c,
+ outer_first_mpls_over_udp.mpls_ttl, match.mask->mpls_ttl);
+ MLX5_SET(fte_match_set_misc2, misc2_v,
+ outer_first_mpls_over_udp.mpls_ttl, match.key->mpls_ttl);
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
+
+ return 0;
+}
+
+struct mlx5e_tc_tunnel mplsoudp_tunnel = {
+ .tunnel_type = MLX5E_TC_TUNNEL_TYPE_MPLSOUDP,
+ .match_level = MLX5_MATCH_L4,
+ .can_offload = can_offload,
+ .calc_hlen = calc_hlen,
+ .init_encap_attr = init_encap_attr,
+ .generate_ip_tun_hdr = generate_ip_tun_hdr,
+ .parse_udp_ports = parse_udp_ports,
+ .parse_tunnel = parse_tunnel,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 8b86c6ded302..8ff9cbe6943d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3520,41 +3520,6 @@ out:
return err;
}
-#ifdef CONFIG_MLX5_ESWITCH
-static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
- struct flow_cls_offload *cls_flower,
- unsigned long flags)
-{
- switch (cls_flower->command) {
- case FLOW_CLS_REPLACE:
- return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
- flags);
- case FLOW_CLS_DESTROY:
- return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
- flags);
- case FLOW_CLS_STATS:
- return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
- flags);
- default:
- return -EOPNOTSUPP;
- }
-}
-
-static int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
- void *cb_priv)
-{
- unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(NIC_OFFLOAD);
- struct mlx5e_priv *priv = cb_priv;
-
- switch (type) {
- case TC_SETUP_CLSFLOWER:
- return mlx5e_setup_tc_cls_flower(priv, type_data, flags);
- default:
- return -EOPNOTSUPP;
- }
-}
-#endif
-
static LIST_HEAD(mlx5e_block_cb_list);
static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
@@ -3563,7 +3528,6 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
struct mlx5e_priv *priv = netdev_priv(dev);
switch (type) {
-#ifdef CONFIG_MLX5_ESWITCH
case TC_SETUP_BLOCK: {
struct flow_block_offload *f = type_data;
@@ -3573,7 +3537,6 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
mlx5e_setup_tc_block_cb,
priv, priv, true);
}
-#endif
case TC_SETUP_QDISC_MQPRIO:
return mlx5e_setup_tc_mqprio(priv, type_data);
default:
@@ -3746,7 +3709,7 @@ static int set_feature_cvlan_filter(struct net_device *netdev, bool enable)
return 0;
}
-#ifdef CONFIG_MLX5_ESWITCH
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
static int set_feature_tc_num_filters(struct net_device *netdev, bool enable)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -3857,7 +3820,7 @@ int mlx5e_set_features(struct net_device *netdev, netdev_features_t features)
err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro);
err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER,
set_feature_cvlan_filter);
-#ifdef CONFIG_MLX5_ESWITCH
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TC, set_feature_tc_num_filters);
#endif
err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXALL, set_feature_rx_all);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 52351c105627..a46405c6d560 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -35,7 +35,6 @@
#include <net/switchdev.h>
#include <net/pkt_cls.h>
#include <net/act_api.h>
-#include <net/netevent.h>
#include <net/arp.h>
#include <net/devlink.h>
#include <net/ipv6_stubs.h>
@@ -45,9 +44,9 @@
#include "en.h"
#include "en_rep.h"
#include "en_tc.h"
-#include "en/tc_tun.h"
+#include "en/rep/tc.h"
+#include "en/rep/neigh.h"
#include "fs_core.h"
-#include "lib/port_tun.h"
#include "lib/mlx5.h"
#define CREATE_TRACE_POINTS
#include "diag/en_rep_tracepoint.h"
@@ -58,16 +57,6 @@
static const char mlx5e_rep_driver_name[] = "mlx5e_rep";
-struct mlx5e_rep_indr_block_priv {
- struct net_device *netdev;
- struct mlx5e_rep_priv *rpriv;
-
- struct list_head list;
-};
-
-static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv,
- struct net_device *netdev);
-
static void mlx5e_rep_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo *drvinfo)
{
@@ -485,706 +474,6 @@ void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv)
mlx5e_sqs2vport_stop(esw, rep);
}
-static unsigned long mlx5e_rep_ipv6_interval(void)
-{
- if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl)
- return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME);
-
- return ~0UL;
-}
-
-static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv)
-{
- unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
- unsigned long ipv6_interval = mlx5e_rep_ipv6_interval();
- struct net_device *netdev = rpriv->netdev;
- struct mlx5e_priv *priv = netdev_priv(netdev);
-
- rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval);
- mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval);
-}
-
-void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv)
-{
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
- struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
-
- mlx5_fc_queue_stats_work(priv->mdev,
- &neigh_update->neigh_stats_work,
- neigh_update->min_interval);
-}
-
-static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe)
-{
- return refcount_inc_not_zero(&nhe->refcnt);
-}
-
-static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe);
-
-static void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe)
-{
- if (refcount_dec_and_test(&nhe->refcnt)) {
- mlx5e_rep_neigh_entry_remove(nhe);
- kfree_rcu(nhe, rcu);
- }
-}
-
-static struct mlx5e_neigh_hash_entry *
-mlx5e_get_next_nhe(struct mlx5e_rep_priv *rpriv,
- struct mlx5e_neigh_hash_entry *nhe)
-{
- struct mlx5e_neigh_hash_entry *next = NULL;
-
- rcu_read_lock();
-
- for (next = nhe ?
- list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
- &nhe->neigh_list,
- struct mlx5e_neigh_hash_entry,
- neigh_list) :
- list_first_or_null_rcu(&rpriv->neigh_update.neigh_list,
- struct mlx5e_neigh_hash_entry,
- neigh_list);
- next;
- next = list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
- &next->neigh_list,
- struct mlx5e_neigh_hash_entry,
- neigh_list))
- if (mlx5e_rep_neigh_entry_hold(next))
- break;
-
- rcu_read_unlock();
-
- if (nhe)
- mlx5e_rep_neigh_entry_release(nhe);
-
- return next;
-}
-
-static void mlx5e_rep_neigh_stats_work(struct work_struct *work)
-{
- struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv,
- neigh_update.neigh_stats_work.work);
- struct net_device *netdev = rpriv->netdev;
- struct mlx5e_priv *priv = netdev_priv(netdev);
- struct mlx5e_neigh_hash_entry *nhe = NULL;
-
- rtnl_lock();
- if (!list_empty(&rpriv->neigh_update.neigh_list))
- mlx5e_rep_queue_neigh_stats_work(priv);
-
- while ((nhe = mlx5e_get_next_nhe(rpriv, nhe)) != NULL)
- mlx5e_tc_update_neigh_used_value(nhe);
-
- rtnl_unlock();
-}
-
-static void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
- struct mlx5e_encap_entry *e,
- bool neigh_connected,
- unsigned char ha[ETH_ALEN])
-{
- struct ethhdr *eth = (struct ethhdr *)e->encap_header;
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- bool encap_connected;
- LIST_HEAD(flow_list);
-
- ASSERT_RTNL();
-
- /* wait for encap to be fully initialized */
- wait_for_completion(&e->res_ready);
-
- mutex_lock(&esw->offloads.encap_tbl_lock);
- encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID);
- if (e->compl_result < 0 || (encap_connected == neigh_connected &&
- ether_addr_equal(e->h_dest, ha)))
- goto unlock;
-
- mlx5e_take_all_encap_flows(e, &flow_list);
-
- if ((e->flags & MLX5_ENCAP_ENTRY_VALID) &&
- (!neigh_connected || !ether_addr_equal(e->h_dest, ha)))
- mlx5e_tc_encap_flows_del(priv, e, &flow_list);
-
- if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) {
- ether_addr_copy(e->h_dest, ha);
- ether_addr_copy(eth->h_dest, ha);
- /* Update the encap source mac, in case that we delete
- * the flows when encap source mac changed.
- */
- ether_addr_copy(eth->h_source, e->route_dev->dev_addr);
-
- mlx5e_tc_encap_flows_add(priv, e, &flow_list);
- }
-unlock:
- mutex_unlock(&esw->offloads.encap_tbl_lock);
- mlx5e_put_encap_flow_list(priv, &flow_list);
-}
-
-static void mlx5e_rep_neigh_update(struct work_struct *work)
-{
- struct mlx5e_neigh_hash_entry *nhe =
- container_of(work, struct mlx5e_neigh_hash_entry, neigh_update_work);
- struct neighbour *n = nhe->n;
- struct mlx5e_encap_entry *e;
- unsigned char ha[ETH_ALEN];
- struct mlx5e_priv *priv;
- bool neigh_connected;
- u8 nud_state, dead;
-
- rtnl_lock();
-
- /* If these parameters are changed after we release the lock,
- * we'll receive another event letting us know about it.
- * We use this lock to avoid inconsistency between the neigh validity
- * and it's hw address.
- */
- read_lock_bh(&n->lock);
- memcpy(ha, n->ha, ETH_ALEN);
- nud_state = n->nud_state;
- dead = n->dead;
- read_unlock_bh(&n->lock);
-
- neigh_connected = (nud_state & NUD_VALID) && !dead;
-
- trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected);
-
- list_for_each_entry(e, &nhe->encap_list, encap_list) {
- if (!mlx5e_encap_take(e))
- continue;
-
- priv = netdev_priv(e->out_dev);
- mlx5e_rep_update_flows(priv, e, neigh_connected, ha);
- mlx5e_encap_put(priv, e);
- }
- mlx5e_rep_neigh_entry_release(nhe);
- rtnl_unlock();
- neigh_release(n);
-}
-
-static struct mlx5e_rep_indr_block_priv *
-mlx5e_rep_indr_block_priv_lookup(struct mlx5e_rep_priv *rpriv,
- struct net_device *netdev)
-{
- struct mlx5e_rep_indr_block_priv *cb_priv;
-
- /* All callback list access should be protected by RTNL. */
- ASSERT_RTNL();
-
- list_for_each_entry(cb_priv,
- &rpriv->uplink_priv.tc_indr_block_priv_list,
- list)
- if (cb_priv->netdev == netdev)
- return cb_priv;
-
- return NULL;
-}
-
-static void mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv)
-{
- struct mlx5e_rep_indr_block_priv *cb_priv, *temp;
- struct list_head *head = &rpriv->uplink_priv.tc_indr_block_priv_list;
-
- list_for_each_entry_safe(cb_priv, temp, head, list) {
- mlx5e_rep_indr_unregister_block(rpriv, cb_priv->netdev);
- kfree(cb_priv);
- }
-}
-
-static int
-mlx5e_rep_indr_offload(struct net_device *netdev,
- struct flow_cls_offload *flower,
- struct mlx5e_rep_indr_block_priv *indr_priv,
- unsigned long flags)
-{
- struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev);
- int err = 0;
-
- switch (flower->command) {
- case FLOW_CLS_REPLACE:
- err = mlx5e_configure_flower(netdev, priv, flower, flags);
- break;
- case FLOW_CLS_DESTROY:
- err = mlx5e_delete_flower(netdev, priv, flower, flags);
- break;
- case FLOW_CLS_STATS:
- err = mlx5e_stats_flower(netdev, priv, flower, flags);
- break;
- default:
- err = -EOPNOTSUPP;
- }
-
- return err;
-}
-
-static int mlx5e_rep_indr_setup_tc_cb(enum tc_setup_type type,
- void *type_data, void *indr_priv)
-{
- unsigned long flags = MLX5_TC_FLAG(EGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD);
- struct mlx5e_rep_indr_block_priv *priv = indr_priv;
-
- switch (type) {
- case TC_SETUP_CLSFLOWER:
- return mlx5e_rep_indr_offload(priv->netdev, type_data, priv,
- flags);
- default:
- return -EOPNOTSUPP;
- }
-}
-
-static int mlx5e_rep_indr_setup_ft_cb(enum tc_setup_type type,
- void *type_data, void *indr_priv)
-{
- struct mlx5e_rep_indr_block_priv *priv = indr_priv;
- struct flow_cls_offload *f = type_data;
- struct flow_cls_offload tmp;
- struct mlx5e_priv *mpriv;
- struct mlx5_eswitch *esw;
- unsigned long flags;
- int err;
-
- mpriv = netdev_priv(priv->rpriv->netdev);
- esw = mpriv->mdev->priv.eswitch;
-
- flags = MLX5_TC_FLAG(EGRESS) |
- MLX5_TC_FLAG(ESW_OFFLOAD) |
- MLX5_TC_FLAG(FT_OFFLOAD);
-
- switch (type) {
- case TC_SETUP_CLSFLOWER:
- memcpy(&tmp, f, sizeof(*f));
-
- /* Re-use tc offload path by moving the ft flow to the
- * reserved ft chain.
- *
- * FT offload can use prio range [0, INT_MAX], so we normalize
- * it to range [1, mlx5_esw_chains_get_prio_range(esw)]
- * as with tc, where prio 0 isn't supported.
- *
- * We only support chain 0 of FT offload.
- */
- if (!mlx5_esw_chains_prios_supported(esw) ||
- tmp.common.prio >= mlx5_esw_chains_get_prio_range(esw) ||
- tmp.common.chain_index)
- return -EOPNOTSUPP;
-
- tmp.common.chain_index = mlx5_esw_chains_get_ft_chain(esw);
- tmp.common.prio++;
- err = mlx5e_rep_indr_offload(priv->netdev, &tmp, priv, flags);
- memcpy(&f->stats, &tmp.stats, sizeof(f->stats));
- return err;
- default:
- return -EOPNOTSUPP;
- }
-}
-
-static void mlx5e_rep_indr_block_unbind(void *cb_priv)
-{
- struct mlx5e_rep_indr_block_priv *indr_priv = cb_priv;
-
- list_del(&indr_priv->list);
- kfree(indr_priv);
-}
-
-static LIST_HEAD(mlx5e_block_cb_list);
-
-static int
-mlx5e_rep_indr_setup_block(struct net_device *netdev,
- struct mlx5e_rep_priv *rpriv,
- struct flow_block_offload *f,
- flow_setup_cb_t *setup_cb)
-{
- struct mlx5e_rep_indr_block_priv *indr_priv;
- struct flow_block_cb *block_cb;
-
- if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
- return -EOPNOTSUPP;
-
- f->unlocked_driver_cb = true;
- f->driver_block_list = &mlx5e_block_cb_list;
-
- switch (f->command) {
- case FLOW_BLOCK_BIND:
- indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev);
- if (indr_priv)
- return -EEXIST;
-
- indr_priv = kmalloc(sizeof(*indr_priv), GFP_KERNEL);
- if (!indr_priv)
- return -ENOMEM;
-
- indr_priv->netdev = netdev;
- indr_priv->rpriv = rpriv;
- list_add(&indr_priv->list,
- &rpriv->uplink_priv.tc_indr_block_priv_list);
-
- block_cb = flow_block_cb_alloc(setup_cb, indr_priv, indr_priv,
- mlx5e_rep_indr_block_unbind);
- if (IS_ERR(block_cb)) {
- list_del(&indr_priv->list);
- kfree(indr_priv);
- return PTR_ERR(block_cb);
- }
- flow_block_cb_add(block_cb, f);
- list_add_tail(&block_cb->driver_list, &mlx5e_block_cb_list);
-
- return 0;
- case FLOW_BLOCK_UNBIND:
- indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev);
- if (!indr_priv)
- return -ENOENT;
-
- block_cb = flow_block_cb_lookup(f->block, setup_cb, indr_priv);
- if (!block_cb)
- return -ENOENT;
-
- flow_block_cb_remove(block_cb, f);
- list_del(&block_cb->driver_list);
- return 0;
- default:
- return -EOPNOTSUPP;
- }
- return 0;
-}
-
-static
-int mlx5e_rep_indr_setup_cb(struct net_device *netdev, void *cb_priv,
- enum tc_setup_type type, void *type_data)
-{
- switch (type) {
- case TC_SETUP_BLOCK:
- return mlx5e_rep_indr_setup_block(netdev, cb_priv, type_data,
- mlx5e_rep_indr_setup_tc_cb);
- case TC_SETUP_FT:
- return mlx5e_rep_indr_setup_block(netdev, cb_priv, type_data,
- mlx5e_rep_indr_setup_ft_cb);
- default:
- return -EOPNOTSUPP;
- }
-}
-
-static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv,
- struct net_device *netdev)
-{
- int err;
-
- err = __flow_indr_block_cb_register(netdev, rpriv,
- mlx5e_rep_indr_setup_cb,
- rpriv);
- if (err) {
- struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
-
- mlx5_core_err(priv->mdev, "Failed to register remote block notifier for %s err=%d\n",
- netdev_name(netdev), err);
- }
- return err;
-}
-
-static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv,
- struct net_device *netdev)
-{
- __flow_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_cb,
- rpriv);
-}
-
-static int mlx5e_nic_rep_netdevice_event(struct notifier_block *nb,
- unsigned long event, void *ptr)
-{
- struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
- uplink_priv.netdevice_nb);
- struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
- struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
-
- if (!mlx5e_tc_tun_device_to_offload(priv, netdev) &&
- !(is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev))
- return NOTIFY_OK;
-
- switch (event) {
- case NETDEV_REGISTER:
- mlx5e_rep_indr_register_block(rpriv, netdev);
- break;
- case NETDEV_UNREGISTER:
- mlx5e_rep_indr_unregister_block(rpriv, netdev);
- break;
- }
- return NOTIFY_OK;
-}
-
-static void
-mlx5e_rep_queue_neigh_update_work(struct mlx5e_priv *priv,
- struct mlx5e_neigh_hash_entry *nhe,
- struct neighbour *n)
-{
- /* Take a reference to ensure the neighbour and mlx5 encap
- * entry won't be destructed until we drop the reference in
- * delayed work.
- */
- neigh_hold(n);
-
- /* This assignment is valid as long as the the neigh reference
- * is taken
- */
- nhe->n = n;
-
- if (!queue_work(priv->wq, &nhe->neigh_update_work)) {
- mlx5e_rep_neigh_entry_release(nhe);
- neigh_release(n);
- }
-}
-
-static struct mlx5e_neigh_hash_entry *
-mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
- struct mlx5e_neigh *m_neigh);
-
-static int mlx5e_rep_netevent_event(struct notifier_block *nb,
- unsigned long event, void *ptr)
-{
- struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
- neigh_update.netevent_nb);
- struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
- struct net_device *netdev = rpriv->netdev;
- struct mlx5e_priv *priv = netdev_priv(netdev);
- struct mlx5e_neigh_hash_entry *nhe = NULL;
- struct mlx5e_neigh m_neigh = {};
- struct neigh_parms *p;
- struct neighbour *n;
- bool found = false;
-
- switch (event) {
- case NETEVENT_NEIGH_UPDATE:
- n = ptr;
-#if IS_ENABLED(CONFIG_IPV6)
- if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl)
-#else
- if (n->tbl != &arp_tbl)
-#endif
- return NOTIFY_DONE;
-
- m_neigh.dev = n->dev;
- m_neigh.family = n->ops->family;
- memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
-
- rcu_read_lock();
- nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh);
- rcu_read_unlock();
- if (!nhe)
- return NOTIFY_DONE;
-
- mlx5e_rep_queue_neigh_update_work(priv, nhe, n);
- break;
-
- case NETEVENT_DELAY_PROBE_TIME_UPDATE:
- p = ptr;
-
- /* We check the device is present since we don't care about
- * changes in the default table, we only care about changes
- * done per device delay prob time parameter.
- */
-#if IS_ENABLED(CONFIG_IPV6)
- if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl))
-#else
- if (!p->dev || p->tbl != &arp_tbl)
-#endif
- return NOTIFY_DONE;
-
- rcu_read_lock();
- list_for_each_entry_rcu(nhe, &neigh_update->neigh_list,
- neigh_list) {
- if (p->dev == nhe->m_neigh.dev) {
- found = true;
- break;
- }
- }
- rcu_read_unlock();
- if (!found)
- return NOTIFY_DONE;
-
- neigh_update->min_interval = min_t(unsigned long,
- NEIGH_VAR(p, DELAY_PROBE_TIME),
- neigh_update->min_interval);
- mlx5_fc_update_sampling_interval(priv->mdev,
- neigh_update->min_interval);
- break;
- }
- return NOTIFY_DONE;
-}
-
-static const struct rhashtable_params mlx5e_neigh_ht_params = {
- .head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node),
- .key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh),
- .key_len = sizeof(struct mlx5e_neigh),
- .automatic_shrinking = true,
-};
-
-static int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv)
-{
- struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
- int err;
-
- err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params);
- if (err)
- return err;
-
- INIT_LIST_HEAD(&neigh_update->neigh_list);
- mutex_init(&neigh_update->encap_lock);
- INIT_DELAYED_WORK(&neigh_update->neigh_stats_work,
- mlx5e_rep_neigh_stats_work);
- mlx5e_rep_neigh_update_init_interval(rpriv);
-
- rpriv->neigh_update.netevent_nb.notifier_call = mlx5e_rep_netevent_event;
- err = register_netevent_notifier(&rpriv->neigh_update.netevent_nb);
- if (err)
- goto out_err;
- return 0;
-
-out_err:
- rhashtable_destroy(&neigh_update->neigh_ht);
- return err;
-}
-
-static void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv)
-{
- struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
- struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
-
- unregister_netevent_notifier(&neigh_update->netevent_nb);
-
- flush_workqueue(priv->wq); /* flush neigh update works */
-
- cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work);
-
- mutex_destroy(&neigh_update->encap_lock);
- rhashtable_destroy(&neigh_update->neigh_ht);
-}
-
-static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv,
- struct mlx5e_neigh_hash_entry *nhe)
-{
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
- int err;
-
- err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht,
- &nhe->rhash_node,
- mlx5e_neigh_ht_params);
- if (err)
- return err;
-
- list_add_rcu(&nhe->neigh_list, &rpriv->neigh_update.neigh_list);
-
- return err;
-}
-
-static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe)
-{
- struct mlx5e_rep_priv *rpriv = nhe->priv->ppriv;
-
- mutex_lock(&rpriv->neigh_update.encap_lock);
-
- list_del_rcu(&nhe->neigh_list);
-
- rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht,
- &nhe->rhash_node,
- mlx5e_neigh_ht_params);
- mutex_unlock(&rpriv->neigh_update.encap_lock);
-}
-
-/* This function must only be called under the representor's encap_lock or
- * inside rcu read lock section.
- */
-static struct mlx5e_neigh_hash_entry *
-mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
- struct mlx5e_neigh *m_neigh)
-{
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
- struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
- struct mlx5e_neigh_hash_entry *nhe;
-
- nhe = rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh,
- mlx5e_neigh_ht_params);
- return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL;
-}
-
-static int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
- struct mlx5e_encap_entry *e,
- struct mlx5e_neigh_hash_entry **nhe)
-{
- int err;
-
- *nhe = kzalloc(sizeof(**nhe), GFP_KERNEL);
- if (!*nhe)
- return -ENOMEM;
-
- (*nhe)->priv = priv;
- memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh));
- INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update);
- spin_lock_init(&(*nhe)->encap_list_lock);
- INIT_LIST_HEAD(&(*nhe)->encap_list);
- refcount_set(&(*nhe)->refcnt, 1);
-
- err = mlx5e_rep_neigh_entry_insert(priv, *nhe);
- if (err)
- goto out_free;
- return 0;
-
-out_free:
- kfree(*nhe);
- return err;
-}
-
-int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
- struct mlx5e_encap_entry *e)
-{
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
- struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
- struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy;
- struct mlx5e_neigh_hash_entry *nhe;
- int err;
-
- err = mlx5_tun_entropy_refcount_inc(tun_entropy, e->reformat_type);
- if (err)
- return err;
-
- mutex_lock(&rpriv->neigh_update.encap_lock);
- nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh);
- if (!nhe) {
- err = mlx5e_rep_neigh_entry_create(priv, e, &nhe);
- if (err) {
- mutex_unlock(&rpriv->neigh_update.encap_lock);
- mlx5_tun_entropy_refcount_dec(tun_entropy,
- e->reformat_type);
- return err;
- }
- }
-
- e->nhe = nhe;
- spin_lock(&nhe->encap_list_lock);
- list_add_rcu(&e->encap_list, &nhe->encap_list);
- spin_unlock(&nhe->encap_list_lock);
-
- mutex_unlock(&rpriv->neigh_update.encap_lock);
-
- return 0;
-}
-
-void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
- struct mlx5e_encap_entry *e)
-{
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
- struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
- struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy;
-
- if (!e->nhe)
- return;
-
- spin_lock(&e->nhe->encap_list_lock);
- list_del_rcu(&e->encap_list);
- spin_unlock(&e->nhe->encap_list_lock);
-
- mlx5e_rep_neigh_entry_release(e->nhe);
- e->nhe = NULL;
- mlx5_tun_entropy_refcount_dec(tun_entropy, e->reformat_type);
-}
-
static int mlx5e_rep_open(struct net_device *dev)
{
struct mlx5e_priv *priv = netdev_priv(dev);
@@ -1225,129 +514,6 @@ static int mlx5e_rep_close(struct net_device *dev)
return ret;
}
-static int
-mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv,
- struct flow_cls_offload *cls_flower, int flags)
-{
- switch (cls_flower->command) {
- case FLOW_CLS_REPLACE:
- return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
- flags);
- case FLOW_CLS_DESTROY:
- return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
- flags);
- case FLOW_CLS_STATS:
- return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
- flags);
- default:
- return -EOPNOTSUPP;
- }
-}
-
-static
-int mlx5e_rep_setup_tc_cls_matchall(struct mlx5e_priv *priv,
- struct tc_cls_matchall_offload *ma)
-{
- switch (ma->command) {
- case TC_CLSMATCHALL_REPLACE:
- return mlx5e_tc_configure_matchall(priv, ma);
- case TC_CLSMATCHALL_DESTROY:
- return mlx5e_tc_delete_matchall(priv, ma);
- case TC_CLSMATCHALL_STATS:
- mlx5e_tc_stats_matchall(priv, ma);
- return 0;
- default:
- return -EOPNOTSUPP;
- }
-}
-
-static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
- void *cb_priv)
-{
- unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD);
- struct mlx5e_priv *priv = cb_priv;
-
- switch (type) {
- case TC_SETUP_CLSFLOWER:
- return mlx5e_rep_setup_tc_cls_flower(priv, type_data, flags);
- case TC_SETUP_CLSMATCHALL:
- return mlx5e_rep_setup_tc_cls_matchall(priv, type_data);
- default:
- return -EOPNOTSUPP;
- }
-}
-
-static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data,
- void *cb_priv)
-{
- struct flow_cls_offload tmp, *f = type_data;
- struct mlx5e_priv *priv = cb_priv;
- struct mlx5_eswitch *esw;
- unsigned long flags;
- int err;
-
- flags = MLX5_TC_FLAG(INGRESS) |
- MLX5_TC_FLAG(ESW_OFFLOAD) |
- MLX5_TC_FLAG(FT_OFFLOAD);
- esw = priv->mdev->priv.eswitch;
-
- switch (type) {
- case TC_SETUP_CLSFLOWER:
- memcpy(&tmp, f, sizeof(*f));
-
- if (!mlx5_esw_chains_prios_supported(esw))
- return -EOPNOTSUPP;
-
- /* Re-use tc offload path by moving the ft flow to the
- * reserved ft chain.
- *
- * FT offload can use prio range [0, INT_MAX], so we normalize
- * it to range [1, mlx5_esw_chains_get_prio_range(esw)]
- * as with tc, where prio 0 isn't supported.
- *
- * We only support chain 0 of FT offload.
- */
- if (tmp.common.prio >= mlx5_esw_chains_get_prio_range(esw))
- return -EOPNOTSUPP;
- if (tmp.common.chain_index != 0)
- return -EOPNOTSUPP;
-
- tmp.common.chain_index = mlx5_esw_chains_get_ft_chain(esw);
- tmp.common.prio++;
- err = mlx5e_rep_setup_tc_cls_flower(priv, &tmp, flags);
- memcpy(&f->stats, &tmp.stats, sizeof(f->stats));
- return err;
- default:
- return -EOPNOTSUPP;
- }
-}
-
-static LIST_HEAD(mlx5e_rep_block_tc_cb_list);
-static LIST_HEAD(mlx5e_rep_block_ft_cb_list);
-static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
- void *type_data)
-{
- struct mlx5e_priv *priv = netdev_priv(dev);
- struct flow_block_offload *f = type_data;
-
- f->unlocked_driver_cb = true;
-
- switch (type) {
- case TC_SETUP_BLOCK:
- return flow_block_cb_setup_simple(type_data,
- &mlx5e_rep_block_tc_cb_list,
- mlx5e_rep_setup_tc_cb,
- priv, priv, true);
- case TC_SETUP_FT:
- return flow_block_cb_setup_simple(type_data,
- &mlx5e_rep_block_ft_cb_list,
- mlx5e_rep_setup_ft_cb,
- priv, priv, true);
- default:
- return -EOPNOTSUPP;
- }
-}
-
bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv)
{
struct mlx5e_rep_priv *rpriv = priv->ppriv;
@@ -1791,31 +957,23 @@ static int mlx5e_init_uplink_rep_tx(struct mlx5e_rep_priv *rpriv)
priv = netdev_priv(netdev);
uplink_priv = &rpriv->uplink_priv;
- mutex_init(&uplink_priv->unready_flows_lock);
- INIT_LIST_HEAD(&uplink_priv->unready_flows);
-
- /* init shared tc flow table */
- err = mlx5e_tc_esw_init(&uplink_priv->tc_ht);
+ err = mlx5e_rep_tc_init(rpriv);
if (err)
return err;
mlx5_init_port_tun_entropy(&uplink_priv->tun_entropy, priv->mdev);
- /* init indirect block notifications */
- INIT_LIST_HEAD(&uplink_priv->tc_indr_block_priv_list);
- uplink_priv->netdevice_nb.notifier_call = mlx5e_nic_rep_netdevice_event;
- err = register_netdevice_notifier_dev_net(rpriv->netdev,
- &uplink_priv->netdevice_nb,
- &uplink_priv->netdevice_nn);
+ err = mlx5e_rep_tc_netdevice_event_register(rpriv);
if (err) {
- mlx5_core_err(priv->mdev, "Failed to register netdev notifier\n");
- goto tc_esw_cleanup;
+ mlx5_core_err(priv->mdev, "Failed to register netdev notifier, err: %d\n",
+ err);
+ goto tc_rep_cleanup;
}
return 0;
-tc_esw_cleanup:
- mlx5e_tc_esw_cleanup(&uplink_priv->tc_ht);
+tc_rep_cleanup:
+ mlx5e_rep_tc_cleanup(rpriv);
return err;
}
@@ -1845,17 +1003,10 @@ destroy_tises:
static void mlx5e_cleanup_uplink_rep_tx(struct mlx5e_rep_priv *rpriv)
{
- struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
-
- /* clean indirect TC block notifications */
- unregister_netdevice_notifier_dev_net(rpriv->netdev,
- &uplink_priv->netdevice_nb,
- &uplink_priv->netdevice_nn);
+ mlx5e_rep_tc_netdevice_event_unregister(rpriv);
mlx5e_rep_indr_clean_block_privs(rpriv);
- /* delete shared tc flow table */
- mlx5e_tc_esw_cleanup(&rpriv->uplink_priv.tc_ht);
- mutex_destroy(&rpriv->uplink_priv.unready_flows_lock);
+ mlx5e_rep_tc_cleanup(rpriv);
}
static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv)
@@ -1897,13 +1048,8 @@ static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event
return NOTIFY_OK;
}
- if (event == MLX5_DEV_EVENT_PORT_AFFINITY) {
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
-
- queue_work(priv->wq, &rpriv->uplink_priv.reoffload_flows_work);
-
- return NOTIFY_OK;
- }
+ if (event == MLX5_DEV_EVENT_PORT_AFFINITY)
+ return mlx5e_rep_tc_event_port_affinity(priv);
return NOTIFY_DONE;
}
@@ -1912,7 +1058,6 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
{
struct net_device *netdev = priv->netdev;
struct mlx5_core_dev *mdev = priv->mdev;
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
u16 max_mtu;
netdev->min_mtu = ETH_MIN_MTU;
@@ -1920,8 +1065,7 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu);
mlx5e_set_dev_port_mtu(priv);
- INIT_WORK(&rpriv->uplink_priv.reoffload_flows_work,
- mlx5e_tc_reoffload_flows_work);
+ mlx5e_rep_tc_enable(priv);
mlx5_lag_add(mdev, netdev);
priv->events_nb.notifier_call = uplink_rep_async_event;
@@ -1933,11 +1077,10 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
mlx5e_dcbnl_delete_app(priv);
mlx5_notifier_unregister(mdev, &priv->events_nb);
- cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work);
+ mlx5e_rep_tc_disable(priv);
mlx5_lag_remove(mdev);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index 6a2337900420..93e911baacad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -158,6 +158,22 @@ struct mlx5e_neigh_hash_entry {
enum {
/* set when the encap entry is successfully offloaded into HW */
MLX5_ENCAP_ENTRY_VALID = BIT(0),
+ MLX5_REFORMAT_DECAP = BIT(1),
+};
+
+struct mlx5e_decap_key {
+ struct ethhdr key;
+};
+
+struct mlx5e_decap_entry {
+ struct mlx5e_decap_key key;
+ struct list_head flows;
+ struct hlist_node hlist;
+ refcount_t refcnt;
+ struct completion res_ready;
+ int compl_result;
+ struct mlx5_pkt_reformat *pkt_reformat;
+ struct rcu_head rcu;
};
struct mlx5e_encap_entry {
@@ -203,11 +219,6 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq,
struct mlx5_cqe64 *cqe);
-int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
- struct mlx5e_encap_entry *e);
-void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
- struct mlx5e_encap_entry *e);
-
void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv);
bool mlx5e_eswitch_rep(struct net_device *netdev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index fdba52136c5d..6b3c82da199c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -42,6 +42,7 @@
#include "en_tc.h"
#include "eswitch.h"
#include "en_rep.h"
+#include "en/rep/tc.h"
#include "ipoib/ipoib.h"
#include "en_accel/ipsec_rxtx.h"
#include "en_accel/tls_rxtx.h"
@@ -1237,12 +1238,12 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
if (rep->vlan && skb_vlan_tag_present(skb))
skb_vlan_pop(skb);
- if (!mlx5e_tc_rep_update_skb(cqe, skb, &tc_priv))
+ if (!mlx5e_rep_tc_update_skb(cqe, skb, &tc_priv))
goto free_wqe;
napi_gro_receive(rq->cq.napi, skb);
- mlx5_tc_rep_post_napi_receive(&tc_priv);
+ mlx5_rep_tc_post_napi_receive(&tc_priv);
free_wqe:
mlx5e_free_rx_wqe(rq, wi, true);
@@ -1293,12 +1294,12 @@ void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq,
mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
- if (!mlx5e_tc_rep_update_skb(cqe, skb, &tc_priv))
+ if (!mlx5e_rep_tc_update_skb(cqe, skb, &tc_priv))
goto mpwrq_cqe_out;
napi_gro_receive(rq->cq.napi, skb);
- mlx5_tc_rep_post_napi_receive(&tc_priv);
+ mlx5_rep_tc_post_napi_receive(&tc_priv);
mpwrq_cqe_out:
if (likely(wi->consumed_strides < rq->mpwqe.num_strides))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index a050808f2128..cc669ea450ae 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -31,6 +31,7 @@
*/
#include <net/flow_dissector.h>
+#include <net/flow_offload.h>
#include <net/sch_generic.h>
#include <net/pkt_cls.h>
#include <net/tc_act/tc_gact.h>
@@ -45,10 +46,14 @@
#include <net/tc_act/tc_tunnel_key.h>
#include <net/tc_act/tc_pedit.h>
#include <net/tc_act/tc_csum.h>
+#include <net/tc_act/tc_mpls.h>
#include <net/arp.h>
#include <net/ipv6_stubs.h>
+#include <net/bareudp.h>
#include "en.h"
#include "en_rep.h"
+#include "en/rep/tc.h"
+#include "en/rep/neigh.h"
#include "en_tc.h"
#include "eswitch.h"
#include "esw/chains.h"
@@ -89,6 +94,7 @@ enum {
MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5,
MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6,
MLX5E_TC_FLOW_FLAG_CT = MLX5E_TC_FLOW_BASE + 7,
+ MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 8,
};
#define MLX5E_TC_MAX_SPLITS 1
@@ -122,6 +128,11 @@ struct mlx5e_tc_flow {
u64 cookie;
unsigned long flags;
struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1];
+
+ /* flows sharing the same reformat object - currently mpls decap */
+ struct list_head l3_to_l2_reformat;
+ struct mlx5e_decap_entry *decap_reformat;
+
/* Flow can be associated with multiple encap IDs.
* The number of encaps is bounded by the number of supported
* destinations.
@@ -153,40 +164,12 @@ struct mlx5e_tc_flow_parse_attr {
struct mlx5_flow_spec spec;
struct mlx5e_tc_mod_hdr_acts mod_hdr_acts;
int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct ethhdr eth;
};
#define MLX5E_TC_TABLE_NUM_GROUPS 4
#define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16)
-struct tunnel_match_key {
- struct flow_dissector_key_control enc_control;
- struct flow_dissector_key_keyid enc_key_id;
- struct flow_dissector_key_ports enc_tp;
- struct flow_dissector_key_ip enc_ip;
- union {
- struct flow_dissector_key_ipv4_addrs enc_ipv4;
- struct flow_dissector_key_ipv6_addrs enc_ipv6;
- };
-
- int filter_ifindex;
-};
-
-struct tunnel_match_enc_opts {
- struct flow_dissector_key_enc_opts key;
- struct flow_dissector_key_enc_opts mask;
-};
-
-/* Tunnel_id mapping is TUNNEL_INFO_BITS + ENC_OPTS_BITS.
- * Upper TUNNEL_INFO_BITS for general tunnel info.
- * Lower ENC_OPTS_BITS bits for enc_opts.
- */
-#define TUNNEL_INFO_BITS 6
-#define TUNNEL_INFO_BITS_MASK GENMASK(TUNNEL_INFO_BITS - 1, 0)
-#define ENC_OPTS_BITS 2
-#define ENC_OPTS_BITS_MASK GENMASK(ENC_OPTS_BITS - 1, 0)
-#define TUNNEL_ID_BITS (TUNNEL_INFO_BITS + ENC_OPTS_BITS)
-#define TUNNEL_ID_MASK GENMASK(TUNNEL_ID_BITS - 1, 0)
-
struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
[CHAIN_TO_REG] = {
.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
@@ -1149,6 +1132,11 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
struct netlink_ext_ack *extack,
struct net_device **encap_dev,
bool *encap_valid);
+static int mlx5e_attach_decap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack);
+static void mlx5e_detach_decap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow);
static struct mlx5_flow_handle *
mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
@@ -1324,6 +1312,12 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
return -EOPNOTSUPP;
}
+ if (flow_flag_test(flow, L3_TO_L2_DECAP)) {
+ err = mlx5e_attach_decap(priv, flow, extack);
+ if (err)
+ return err;
+ }
+
for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
int mirred_ifindex;
@@ -1433,6 +1427,9 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
mlx5_fc_destroy(attr->counter_dev, attr->counter);
+
+ if (flow_flag_test(flow, L3_TO_L2_DECAP))
+ mlx5e_detach_decap(priv, flow);
}
void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
@@ -1709,6 +1706,17 @@ static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entr
kfree_rcu(e, rcu);
}
+static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
+ struct mlx5e_decap_entry *d)
+{
+ WARN_ON(!list_empty(&d->flows));
+
+ if (!d->compl_result)
+ mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
+
+ kfree_rcu(d, rcu);
+}
+
void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
@@ -1721,6 +1729,18 @@ void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
mlx5e_encap_dealloc(priv, e);
}
+static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
+ return;
+ hash_del_rcu(&d->hlist);
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+
+ mlx5e_decap_dealloc(priv, d);
+}
+
static void mlx5e_detach_encap(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow, int out_index)
{
@@ -1744,6 +1764,29 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv,
mlx5e_encap_dealloc(priv, e);
}
+static void mlx5e_detach_decap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_decap_entry *d = flow->decap_reformat;
+
+ if (!d)
+ return;
+
+ mutex_lock(&esw->offloads.decap_tbl_lock);
+ list_del(&flow->l3_to_l2_reformat);
+ flow->decap_reformat = NULL;
+
+ if (!refcount_dec_and_test(&d->refcnt)) {
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ return;
+ }
+ hash_del_rcu(&d->hlist);
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+
+ mlx5e_decap_dealloc(priv, d);
+}
+
static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
{
struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
@@ -2015,7 +2058,11 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
return err;
}
- flow->esw_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
+ /* With mpls over udp we decapsulate using packet reformat
+ * object
+ */
+ if (!netif_is_bareudp(filter_dev))
+ flow->esw_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
}
if (!needs_mapping && !sets_mapping)
@@ -2098,6 +2145,20 @@ static int mlx5e_flower_parse_meta(struct net_device *filter_dev,
return 0;
}
+static bool skip_key_basic(struct net_device *filter_dev,
+ struct flow_cls_offload *f)
+{
+ /* When doing mpls over udp decap, the user needs to provide
+ * MPLS_UC as the protocol in order to be able to match on mpls
+ * label fields. However, the actual ethertype is IP so we want to
+ * avoid matching on this, otherwise we'll fail the match.
+ */
+ if (netif_is_bareudp(filter_dev) && f->common.chain_index == 0)
+ return true;
+
+ return false;
+}
+
static int __parse_cls_flower(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec,
@@ -2142,7 +2203,8 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
BIT(FLOW_DISSECTOR_KEY_IP) |
BIT(FLOW_DISSECTOR_KEY_CT) |
BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
- BIT(FLOW_DISSECTOR_KEY_ENC_OPTS))) {
+ BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) |
+ BIT(FLOW_DISSECTOR_KEY_MPLS))) {
NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n",
dissector->used_keys);
@@ -2172,7 +2234,8 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
if (err)
return err;
- if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC) &&
+ !skip_key_basic(filter_dev, f)) {
struct flow_match_basic match;
flow_rule_match_basic(rule, &match);
@@ -2837,10 +2900,12 @@ void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
static const struct pedit_headers zero_masks = {};
-static int parse_tc_pedit_action(struct mlx5e_priv *priv,
- const struct flow_action_entry *act, int namespace,
- struct pedit_headers_action *hdrs,
- struct netlink_ext_ack *extack)
+static int
+parse_pedit_to_modify_hdr(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act, int namespace,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct pedit_headers_action *hdrs,
+ struct netlink_ext_ack *extack)
{
u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1;
int err = -EOPNOTSUPP;
@@ -2876,6 +2941,46 @@ out_err:
return err;
}
+static int
+parse_pedit_to_reformat(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct netlink_ext_ack *extack)
+{
+ u32 mask, val, offset;
+ u32 *p;
+
+ if (act->id != FLOW_ACTION_MANGLE)
+ return -EOPNOTSUPP;
+
+ if (act->mangle.htype != FLOW_ACT_MANGLE_HDR_TYPE_ETH) {
+ NL_SET_ERR_MSG_MOD(extack, "Only Ethernet modification is supported");
+ return -EOPNOTSUPP;
+ }
+
+ mask = ~act->mangle.mask;
+ val = act->mangle.val;
+ offset = act->mangle.offset;
+ p = (u32 *)&parse_attr->eth;
+ *(p + (offset >> 2)) |= (val & mask);
+
+ return 0;
+}
+
+static int parse_tc_pedit_action(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act, int namespace,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct pedit_headers_action *hdrs,
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack)
+{
+ if (flow && flow_flag_test(flow, L3_TO_L2_DECAP))
+ return parse_pedit_to_reformat(priv, act, parse_attr, extack);
+
+ return parse_pedit_to_modify_hdr(priv, act, namespace,
+ parse_attr, hdrs, extack);
+}
+
static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
struct mlx5e_tc_flow_parse_attr *parse_attr,
struct pedit_headers_action *hdrs,
@@ -3134,7 +3239,7 @@ static int add_vlan_rewrite_action(struct mlx5e_priv *priv, int namespace,
return -EOPNOTSUPP;
}
- err = parse_tc_pedit_action(priv, &pedit_act, namespace, hdrs, NULL);
+ err = parse_tc_pedit_action(priv, &pedit_act, namespace, parse_attr, hdrs, NULL, extack);
*action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
return err;
@@ -3200,7 +3305,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
case FLOW_ACTION_MANGLE:
case FLOW_ACTION_ADD:
err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_KERNEL,
- hdrs, extack);
+ parse_attr, hdrs, NULL, extack);
if (err)
return err;
@@ -3294,12 +3399,22 @@ static inline int cmp_encap_info(struct encap_key *a,
a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type;
}
+static inline int cmp_decap_info(struct mlx5e_decap_key *a,
+ struct mlx5e_decap_key *b)
+{
+ return memcmp(&a->key, &b->key, sizeof(b->key));
+}
+
static inline int hash_encap_info(struct encap_key *key)
{
return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
key->tc_tunnel->tunnel_type);
}
+static inline int hash_decap_info(struct mlx5e_decap_key *key)
+{
+ return jhash(&key->key, sizeof(key->key), 0);
+}
static bool is_merged_eswitch_dev(struct mlx5e_priv *priv,
struct net_device *peer_netdev)
@@ -3314,13 +3429,16 @@ static bool is_merged_eswitch_dev(struct mlx5e_priv *priv,
same_hw_devs(priv, peer_priv));
}
-
-
bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
{
return refcount_inc_not_zero(&e->refcnt);
}
+static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
+{
+ return refcount_inc_not_zero(&e->refcnt);
+}
+
static struct mlx5e_encap_entry *
mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key,
uintptr_t hash_key)
@@ -3341,6 +3459,24 @@ mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key,
return NULL;
}
+static struct mlx5e_decap_entry *
+mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
+ uintptr_t hash_key)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_decap_key r_key;
+ struct mlx5e_decap_entry *e;
+
+ hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
+ hlist, hash_key) {
+ r_key = e->key;
+ if (!cmp_decap_info(&r_key, key) &&
+ mlx5e_decap_take(e))
+ return e;
+ }
+ return NULL;
+}
+
static struct ip_tunnel_info *dup_tun_info(const struct ip_tunnel_info *tun_info)
{
size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
@@ -3486,6 +3622,84 @@ out_err_init:
return err;
}
+static int mlx5e_attach_decap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5e_decap_entry *d;
+ struct mlx5e_decap_key key;
+ uintptr_t hash_key;
+ int err;
+
+ parse_attr = attr->parse_attr;
+ if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "encap header larger than max supported");
+ return -EOPNOTSUPP;
+ }
+
+ key.key = parse_attr->eth;
+ hash_key = hash_decap_info(&key);
+ mutex_lock(&esw->offloads.decap_tbl_lock);
+ d = mlx5e_decap_get(priv, &key, hash_key);
+ if (d) {
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ wait_for_completion(&d->res_ready);
+ mutex_lock(&esw->offloads.decap_tbl_lock);
+ if (d->compl_result) {
+ err = -EREMOTEIO;
+ goto out_free;
+ }
+ goto found;
+ }
+
+ d = kzalloc(sizeof(*d), GFP_KERNEL);
+ if (!d) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ d->key = key;
+ refcount_set(&d->refcnt, 1);
+ init_completion(&d->res_ready);
+ INIT_LIST_HEAD(&d->flows);
+ hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+
+ d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
+ MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2,
+ sizeof(parse_attr->eth),
+ &parse_attr->eth,
+ MLX5_FLOW_NAMESPACE_FDB);
+ if (IS_ERR(d->pkt_reformat)) {
+ err = PTR_ERR(d->pkt_reformat);
+ d->compl_result = err;
+ }
+ mutex_lock(&esw->offloads.decap_tbl_lock);
+ complete_all(&d->res_ready);
+ if (err)
+ goto out_free;
+
+found:
+ flow->decap_reformat = d;
+ attr->decap_pkt_reformat = d->pkt_reformat;
+ list_add(&flow->l3_to_l2_reformat, &d->flows);
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ return 0;
+
+out_free:
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ mlx5e_decap_put(priv, d);
+ return err;
+
+out_err:
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ return err;
+}
+
static int parse_tc_vlan_action(struct mlx5e_priv *priv,
const struct flow_action_entry *act,
struct mlx5_esw_flow_attr *attr,
@@ -3697,7 +3911,8 @@ static int verify_uplink_forwarding(struct mlx5e_priv *priv,
static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
struct flow_action *flow_action,
struct mlx5e_tc_flow *flow,
- struct netlink_ext_ack *extack)
+ struct netlink_ext_ack *extack,
+ struct net_device *filter_dev)
{
struct pedit_headers_action hdrs[2] = {};
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
@@ -3711,6 +3926,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
bool encap = false, decap = false;
u32 action = attr->action;
int err, i, if_count = 0;
+ bool mpls_push = false;
if (!flow_action_has_entries(flow_action))
return -EINVAL;
@@ -3725,15 +3941,48 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
MLX5_FLOW_CONTEXT_ACTION_COUNT;
break;
+ case FLOW_ACTION_MPLS_PUSH:
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
+ reformat_l2_to_l3_tunnel) ||
+ act->mpls_push.proto != htons(ETH_P_MPLS_UC)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "mpls push is supported only for mpls_uc protocol");
+ return -EOPNOTSUPP;
+ }
+ mpls_push = true;
+ break;
+ case FLOW_ACTION_MPLS_POP:
+ /* we only support mpls pop if it is the first action
+ * and the filter net device is bareudp. Subsequent
+ * actions can be pedit and the last can be mirred
+ * egress redirect.
+ */
+ if (i) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "mpls pop supported only as first action");
+ return -EOPNOTSUPP;
+ }
+ if (!netif_is_bareudp(filter_dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "mpls pop supported only on bareudp devices");
+ return -EOPNOTSUPP;
+ }
+
+ parse_attr->eth.h_proto = act->mpls_pop.proto;
+ action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ flow_flag_set(flow, L3_TO_L2_DECAP);
+ break;
case FLOW_ACTION_MANGLE:
case FLOW_ACTION_ADD:
err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB,
- hdrs, extack);
+ parse_attr, hdrs, flow, extack);
if (err)
return err;
- action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
- attr->split_count = attr->out_count;
+ if (!flow_flag_test(flow, L3_TO_L2_DECAP)) {
+ action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ attr->split_count = attr->out_count;
+ }
break;
case FLOW_ACTION_CSUM:
if (csum_offload_supported(priv, action,
@@ -3755,6 +4004,12 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
return -EINVAL;
}
+ if (mpls_push && !netif_is_bareudp(out_dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "mpls is supported only through a bareudp device");
+ return -EOPNOTSUPP;
+ }
+
if (ft_flow && out_dev == priv->netdev) {
/* Ignore forward to self rules generated
* by adding both mlx5 devs to the flow table
@@ -4085,6 +4340,7 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
INIT_LIST_HEAD(&flow->encaps[out_index].list);
INIT_LIST_HEAD(&flow->mod_hdr);
INIT_LIST_HEAD(&flow->hairpin);
+ INIT_LIST_HEAD(&flow->l3_to_l2_reformat);
refcount_set(&flow->refcnt, 1);
init_completion(&flow->init_done);
@@ -4154,7 +4410,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
if (err)
goto err_free;
- err = parse_tc_fdb_actions(priv, &rule->action, flow, extack);
+ err = parse_tc_fdb_actions(priv, &rule->action, flow, extack, filter_dev);
if (err)
goto err_free;
@@ -4806,148 +5062,35 @@ void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
mutex_unlock(&rpriv->unready_flows_lock);
}
-#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
-static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb,
- struct mlx5e_tc_update_priv *tc_priv,
- u32 tunnel_id)
-{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct tunnel_match_enc_opts enc_opts = {};
- struct mlx5_rep_uplink_priv *uplink_priv;
- struct mlx5e_rep_priv *uplink_rpriv;
- struct metadata_dst *tun_dst;
- struct tunnel_match_key key;
- u32 tun_id, enc_opts_id;
- struct net_device *dev;
- int err;
-
- enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK;
- tun_id = tunnel_id >> ENC_OPTS_BITS;
-
- if (!tun_id)
- return true;
-
- uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
- uplink_priv = &uplink_rpriv->uplink_priv;
-
- err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key);
- if (err) {
- WARN_ON_ONCE(true);
- netdev_dbg(priv->netdev,
- "Couldn't find tunnel for tun_id: %d, err: %d\n",
- tun_id, err);
- return false;
- }
-
- if (enc_opts_id) {
- err = mapping_find(uplink_priv->tunnel_enc_opts_mapping,
- enc_opts_id, &enc_opts);
- if (err) {
- netdev_dbg(priv->netdev,
- "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n",
- enc_opts_id, err);
- return false;
- }
- }
-
- tun_dst = tun_rx_dst(enc_opts.key.len);
- if (!tun_dst) {
- WARN_ON_ONCE(true);
- return false;
- }
-
- ip_tunnel_key_init(&tun_dst->u.tun_info.key,
- key.enc_ipv4.src, key.enc_ipv4.dst,
- key.enc_ip.tos, key.enc_ip.ttl,
- 0, /* label */
- key.enc_tp.src, key.enc_tp.dst,
- key32_to_tunnel_id(key.enc_key_id.keyid),
- TUNNEL_KEY);
-
- if (enc_opts.key.len)
- ip_tunnel_info_opts_set(&tun_dst->u.tun_info,
- enc_opts.key.data,
- enc_opts.key.len,
- enc_opts.key.dst_opt_type);
-
- skb_dst_set(skb, (struct dst_entry *)tun_dst);
- dev = dev_get_by_index(&init_net, key.filter_ifindex);
- if (!dev) {
- netdev_dbg(priv->netdev,
- "Couldn't find tunnel device with ifindex: %d\n",
- key.filter_ifindex);
- return false;
+static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
+ struct flow_cls_offload *cls_flower,
+ unsigned long flags)
+{
+ switch (cls_flower->command) {
+ case FLOW_CLS_REPLACE:
+ return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
+ flags);
+ case FLOW_CLS_DESTROY:
+ return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
+ flags);
+ case FLOW_CLS_STATS:
+ return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
+ flags);
+ default:
+ return -EOPNOTSUPP;
}
-
- /* Set tun_dev so we do dev_put() after datapath */
- tc_priv->tun_dev = dev;
-
- skb->dev = dev;
-
- return true;
}
-#endif /* CONFIG_NET_TC_SKB_EXT */
-bool mlx5e_tc_rep_update_skb(struct mlx5_cqe64 *cqe,
- struct sk_buff *skb,
- struct mlx5e_tc_update_priv *tc_priv)
+int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
{
-#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
- u32 chain = 0, reg_c0, reg_c1, tunnel_id, tuple_id;
- struct mlx5_rep_uplink_priv *uplink_priv;
- struct mlx5e_rep_priv *uplink_rpriv;
- struct tc_skb_ext *tc_skb_ext;
- struct mlx5_eswitch *esw;
- struct mlx5e_priv *priv;
- int tunnel_moffset;
- int err;
-
- reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK);
- if (reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG)
- reg_c0 = 0;
- reg_c1 = be32_to_cpu(cqe->ft_metadata);
-
- if (!reg_c0)
- return true;
-
- priv = netdev_priv(skb->dev);
- esw = priv->mdev->priv.eswitch;
-
- err = mlx5_eswitch_get_chain_for_tag(esw, reg_c0, &chain);
- if (err) {
- netdev_dbg(priv->netdev,
- "Couldn't find chain for chain tag: %d, err: %d\n",
- reg_c0, err);
- return false;
- }
-
- if (chain) {
- tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT);
- if (!tc_skb_ext) {
- WARN_ON(1);
- return false;
- }
-
- tc_skb_ext->chain = chain;
+ unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(NIC_OFFLOAD);
+ struct mlx5e_priv *priv = cb_priv;
- tuple_id = reg_c1 & TUPLE_ID_MAX;
-
- uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
- uplink_priv = &uplink_rpriv->uplink_priv;
- if (!mlx5e_tc_ct_restore_flow(uplink_priv, skb, tuple_id))
- return false;
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ return mlx5e_setup_tc_cls_flower(priv, type_data, flags);
+ default:
+ return -EOPNOTSUPP;
}
-
- tunnel_moffset = mlx5e_tc_attr_to_reg_mappings[TUNNEL_TO_REG].moffset;
- tunnel_id = reg_c1 >> (8 * tunnel_moffset);
- return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id);
-#endif /* CONFIG_NET_TC_SKB_EXT */
-
- return true;
-}
-
-void mlx5_tc_rep_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv)
-{
- if (tc_priv->tun_dev)
- dev_put(tc_priv->tun_dev);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index abdcfa4c4e0e..037aa73bf9ab 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -34,11 +34,41 @@
#define __MLX5_EN_TC_H__
#include <net/pkt_cls.h>
+#include "en.h"
#define MLX5E_TC_FLOW_ID_MASK 0x0000ffff
#ifdef CONFIG_MLX5_ESWITCH
+struct tunnel_match_key {
+ struct flow_dissector_key_control enc_control;
+ struct flow_dissector_key_keyid enc_key_id;
+ struct flow_dissector_key_ports enc_tp;
+ struct flow_dissector_key_ip enc_ip;
+ union {
+ struct flow_dissector_key_ipv4_addrs enc_ipv4;
+ struct flow_dissector_key_ipv6_addrs enc_ipv6;
+ };
+
+ int filter_ifindex;
+};
+
+struct tunnel_match_enc_opts {
+ struct flow_dissector_key_enc_opts key;
+ struct flow_dissector_key_enc_opts mask;
+};
+
+/* Tunnel_id mapping is TUNNEL_INFO_BITS + ENC_OPTS_BITS.
+ * Upper TUNNEL_INFO_BITS for general tunnel info.
+ * Lower ENC_OPTS_BITS bits for enc_opts.
+ */
+#define TUNNEL_INFO_BITS 6
+#define TUNNEL_INFO_BITS_MASK GENMASK(TUNNEL_INFO_BITS - 1, 0)
+#define ENC_OPTS_BITS 2
+#define ENC_OPTS_BITS_MASK GENMASK(ENC_OPTS_BITS - 1, 0)
+#define TUNNEL_ID_BITS (TUNNEL_INFO_BITS + ENC_OPTS_BITS)
+#define TUNNEL_ID_MASK GENMASK(TUNNEL_ID_BITS - 1, 0)
+
enum {
MLX5E_TC_FLAG_INGRESS_BIT,
MLX5E_TC_FLAG_EGRESS_BIT,
@@ -50,9 +80,6 @@ enum {
#define MLX5_TC_FLAG(flag) BIT(MLX5E_TC_FLAG_##flag##_BIT)
-int mlx5e_tc_nic_init(struct mlx5e_priv *priv);
-void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv);
-
int mlx5e_tc_esw_init(struct rhashtable *tc_ht);
void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht);
@@ -119,11 +146,6 @@ struct mlx5e_tc_update_priv {
struct net_device *tun_dev;
};
-bool mlx5e_tc_rep_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb,
- struct mlx5e_tc_update_priv *tc_priv);
-
-void mlx5_tc_rep_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv);
-
struct mlx5e_tc_mod_hdr_acts {
int num_actions;
int max_actions;
@@ -148,6 +170,22 @@ void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
struct mlx5e_tc_flow;
u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow);
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+
+int mlx5e_tc_nic_init(struct mlx5e_priv *priv);
+void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv);
+
+int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv);
+
+#else /* CONFIG_MLX5_CLS_ACT */
+static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; }
+static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {}
+static inline int
+mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
+{ return -EOPNOTSUPP; }
+#endif /* CONFIG_MLX5_CLS_ACT */
+
#else /* CONFIG_MLX5_ESWITCH */
static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; }
static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {}
@@ -156,6 +194,10 @@ static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv,
{
return 0;
}
+
+static inline int
+mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
+{ return -EOPNOTSUPP; }
#endif
#endif /* __MLX5_EN_TC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.h
index f8c4239846ea..7679ac359e31 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.h
@@ -6,6 +6,8 @@
#include "eswitch.h"
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+
bool
mlx5_esw_chains_prios_supported(struct mlx5_eswitch *esw);
bool
@@ -46,4 +48,21 @@ void mlx5_esw_chains_destroy(struct mlx5_eswitch *esw);
int
mlx5_eswitch_get_chain_for_tag(struct mlx5_eswitch *esw, u32 tag, u32 *chain);
+#else /* CONFIG_MLX5_CLS_ACT */
+
+static inline struct mlx5_flow_table *
+mlx5_esw_chains_get_table(struct mlx5_eswitch *esw, u32 chain, u32 prio,
+ u32 level) { return ERR_PTR(-EOPNOTSUPP); }
+static inline void
+mlx5_esw_chains_put_table(struct mlx5_eswitch *esw, u32 chain, u32 prio,
+ u32 level) {}
+
+static inline struct mlx5_flow_table *
+mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw) { return ERR_PTR(-EOPNOTSUPP); }
+
+static inline int mlx5_esw_chains_create(struct mlx5_eswitch *esw) { return 0; }
+static inline void mlx5_esw_chains_destroy(struct mlx5_eswitch *esw) {}
+
+#endif /* CONFIG_MLX5_CLS_ACT */
+
#endif /* __ML5_ESW_CHAINS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index c5eb4e7754a9..ac79b7c9aeb3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -2262,6 +2262,8 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
hash_init(esw->offloads.encap_tbl);
mutex_init(&esw->offloads.mod_hdr.lock);
hash_init(esw->offloads.mod_hdr.hlist);
+ mutex_init(&esw->offloads.decap_tbl_lock);
+ hash_init(esw->offloads.decap_tbl);
atomic64_set(&esw->offloads.num_flows, 0);
mutex_init(&esw->state_lock);
mutex_init(&esw->mode_lock);
@@ -2303,6 +2305,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
mutex_destroy(&esw->state_lock);
mutex_destroy(&esw->offloads.mod_hdr.lock);
mutex_destroy(&esw->offloads.encap_tbl_lock);
+ mutex_destroy(&esw->offloads.decap_tbl_lock);
kfree(esw->vports);
kfree(esw);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 4a1c6c78bb14..ccbbea3e0505 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -209,6 +209,8 @@ struct mlx5_esw_offload {
struct mutex peer_mutex;
struct mutex encap_tbl_lock; /* protects encap_tbl */
DECLARE_HASHTABLE(encap_tbl, 8);
+ struct mutex decap_tbl_lock; /* protects decap_tbl */
+ DECLARE_HASHTABLE(decap_tbl, 8);
struct mod_hdr_tbl mod_hdr;
DECLARE_HASHTABLE(termtbl_tbl, 8);
struct mutex termtbl_mutex; /* protects termtbl hash */
@@ -432,6 +434,7 @@ struct mlx5_esw_flow_attr {
struct mlx5_flow_table *fdb;
struct mlx5_flow_table *dest_ft;
struct mlx5_ct_attr ct_attr;
+ struct mlx5_pkt_reformat *decap_pkt_reformat;
struct mlx5e_tc_flow_parse_attr *parse_attr;
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 57ac2ef52e80..554fc64d8ef6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -366,6 +366,10 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
}
}
}
+
+ if (attr->decap_pkt_reformat)
+ flow_act.pkt_reformat = attr->decap_pkt_reformat;
+
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
dest[i].counter_id = mlx5_fc_id(attr->counter);
@@ -1727,7 +1731,9 @@ static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw)
{
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
mlx5e_tc_clean_fdb_peer_flows(esw);
+#endif
esw_del_fdb_peer_miss_rules(esw);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c
index 8809a65ecefb..e042e0924079 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c
@@ -144,11 +144,11 @@ static int mlx5_set_entropy(struct mlx5_tun_entropy *tun_entropy,
int mlx5_tun_entropy_refcount_inc(struct mlx5_tun_entropy *tun_entropy,
int reformat_type)
{
- /* the default is error for unknown (non VXLAN/GRE tunnel types) */
int err = -EOPNOTSUPP;
mutex_lock(&tun_entropy->lock);
- if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_VXLAN &&
+ if ((reformat_type == MLX5_REFORMAT_TYPE_L2_TO_VXLAN ||
+ reformat_type == MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL) &&
tun_entropy->enabled) {
/* in case entropy calculation is enabled for all tunneling
* types, it is ok for VXLAN, so approve.
diff --git a/include/net/bareudp.h b/include/net/bareudp.h
index cb03f6f15956..dc65a0d71d9b 100644
--- a/include/net/bareudp.h
+++ b/include/net/bareudp.h
@@ -5,6 +5,7 @@
#include <linux/types.h>
#include <linux/skbuff.h>
+#include <net/rtnetlink.h>
struct bareudp_conf {
__be16 ethertype;
@@ -17,4 +18,10 @@ struct net_device *bareudp_dev_create(struct net *net, const char *name,
u8 name_assign_type,
struct bareudp_conf *info);
+static inline bool netif_is_bareudp(const struct net_device *dev)
+{
+ return dev->rtnl_link_ops &&
+ !strcmp(dev->rtnl_link_ops->kind, "bareudp");
+}
+
#endif