diff options
author | Jakub Kicinski <[email protected]> | 2023-02-15 19:24:52 -0800 |
---|---|---|
committer | Jakub Kicinski <[email protected]> | 2023-02-15 19:24:52 -0800 |
commit | 0f19f514dea1837caff0b40c3e966ecedc95fec8 (patch) | |
tree | 4dcbcb0060a7054a040f34b36a002d74ca5b08e4 | |
parent | 388a9c907a51489bf566165c72e4e8aa4d62ab49 (diff) | |
parent | 72ed5d5624af384eaf74d84915810d54486a75e2 (diff) |
Merge tag 'mlx5-updates-2023-02-10' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux
Saeed Mahameed says:
====================
mlx5-updates-2023-02-10
1) From Roi and Mark: MultiPort eswitch support
MultiPort E-Switch builds on newer hardware's capabilities and introduces
a mode where a single E-Switch is used and all the vports and physical
ports on the NIC are connected to it.
The new mode will allow in the future a decrease in the memory used by the
driver and advanced features that aren't possible today.
This represents a big change in the current E-Switch implantation in mlx5.
Currently, by default, each E-Switch manager manages its E-Switch.
Steering rules in each E-Switch can only forward traffic to the native
physical port associated with that E-Switch. While there are ways to target
non-native physical ports, for example using a bond or via special TC
rules. None of the ways allows a user to configure the driver
to operate by default in such a mode nor can the driver decide
to move to this mode by default as it's user configuration-driven right now.
While MultiPort E-Switch single FDB mode is the preferred mode, older
generations of ConnectX hardware couldn't support this mode so it was never
implemented. Now that there is capable hardware present, start the
transition to having this mode by default.
Introduce a devlink parameter to control MultiPort Eswitch single FDB mode.
This will allow users to select this mode on their system right now
and in the future will allow the driver to move to this mode by default.
2) From Jiri: Improvements and fixes for mlx5 netdev's devlink logic
2.1) Cleanups related to mlx5's devlink port logic
2.2) Move devlink port registration to be done before netdev alloc
2.3) Create auxdev devlink instance in the same ns as parent devlink
2.4) Suspend auxiliary devices only in case of PCI device suspend
* tag 'mlx5-updates-2023-02-10' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux:
net/mlx5: Suspend auxiliary devices only in case of PCI device suspend
net/mlx5: Remove "recovery" arg from mlx5_load_one() function
net/mlx5e: Create auxdev devlink instance in the same ns as parent devlink
net/mlx5e: Move devlink port registration to be done before netdev alloc
net/mlx5e: Move dl_port to struct mlx5e_dev
net/mlx5e: Replace usage of mlx5e_devlink_get_dl_port() by netdev->devlink_port
net/mlx5e: Pass mdev to mlx5e_devlink_port_register()
net/mlx5: Remove outdated comment
net/mlx5e: TC, Remove redundant parse_attr argument
net/mlx5e: Use a simpler comparison for uplink rep
net/mlx5: Lag, Add single RDMA device in multiport mode
net/mlx5: Lag, set different uplink vport metadata in multiport eswitch mode
net/mlx5: E-Switch, rename bond update function to be reused
net/mlx5e: TC, Add peer flow in mpesw mode
net/mlx5: Lag, Control MultiPort E-Switch single FDB mode
====================
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Jakub Kicinski <[email protected]>
32 files changed, 334 insertions, 182 deletions
diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst index 29ad304e6fba..3321117cf605 100644 --- a/Documentation/networking/devlink/mlx5.rst +++ b/Documentation/networking/devlink/mlx5.rst @@ -54,6 +54,24 @@ parameters. - Control the number of large groups (size > 1) in the FDB table. * The default value is 15, and the range is between 1 and 1024. + * - ``esw_multiport`` + - Boolean + - runtime + - Control MultiPort E-Switch shared fdb mode. + + An experimental mode where a single E-Switch is used and all the vports + and physical ports on the NIC are connected to it. + + An example is to send traffic from a VF that is created on PF0 to an + uplink that is natively associated with the uplink of PF1 + + Note: Future devices, ConnectX-8 and onward, will eventually have this + as the default to allow forwarding between all NIC ports in a single + E-switch environment and the dual E-switch mode will likely get + deprecated. + + Default: disabled + The ``mlx5`` driver supports reloading via ``DEVLINK_CMD_RELOAD`` diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 52821485371a..ddcfc116b19a 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -37,6 +37,7 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) const struct mlx5_ib_profile *profile; struct mlx5_core_dev *peer_dev; struct mlx5_ib_dev *ibdev; + int second_uplink = false; u32 peer_num_ports; int vport_index; int ret; @@ -47,17 +48,24 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) peer_dev = mlx5_lag_get_peer_mdev(dev); peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev); if (mlx5_lag_is_master(dev)) { - /* Only 1 ib port is the representor for both uplinks */ - num_ports += peer_num_ports - 1; + if (mlx5_lag_is_mpesw(dev)) + num_ports += peer_num_ports; + else + num_ports += peer_num_ports - 1; + } else { - if (rep->vport == MLX5_VPORT_UPLINK) - return 0; + if (rep->vport == MLX5_VPORT_UPLINK) { + if (!mlx5_lag_is_mpesw(dev)) + return 0; + second_uplink = true; + } + vport_index += peer_num_ports; dev = peer_dev; } } - if (rep->vport == MLX5_VPORT_UPLINK) + if (rep->vport == MLX5_VPORT_UPLINK && !second_uplink) profile = &raw_eth_profile; else return mlx5_ib_set_vport_rep(dev, rep, vport_index); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 17ae9b4ec794..445fe30c3d0b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -377,10 +377,6 @@ int mlx5_attach_device(struct mlx5_core_dev *dev) /* Pay attention that this is not PCI driver that * mlx5_core_dev is connected, but auxiliary driver. - * - * Here we can race of module unload with devlink - * reload, but we don't need to take extra lock because - * we are holding global mlx5_intf_mutex. */ if (!adev->dev.driver) continue; @@ -400,7 +396,7 @@ int mlx5_attach_device(struct mlx5_core_dev *dev) return ret; } -void mlx5_detach_device(struct mlx5_core_dev *dev) +void mlx5_detach_device(struct mlx5_core_dev *dev, bool suspend) { struct mlx5_priv *priv = &dev->priv; struct auxiliary_device *adev; @@ -429,7 +425,7 @@ void mlx5_detach_device(struct mlx5_core_dev *dev) adrv = to_auxiliary_drv(adev->dev.driver); - if (adrv->suspend) { + if (adrv->suspend && suspend) { adrv->suspend(adev, pm); continue; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index b742e04deec1..c5d2fdcabd56 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -7,6 +7,7 @@ #include "fw_reset.h" #include "fs_core.h" #include "eswitch.h" +#include "lag/lag.h" #include "esw/qos.h" #include "sf/dev/dev.h" #include "sf/sf.h" @@ -104,7 +105,7 @@ static int mlx5_devlink_reload_fw_activate(struct devlink *devlink, struct netli if (err) return err; - mlx5_unload_one_devl_locked(dev); + mlx5_unload_one_devl_locked(dev, true); err = mlx5_health_wait_pci_up(dev); if (err) NL_SET_ERR_MSG_MOD(extack, "FW activate aborted, PCI reads fail after reset"); @@ -167,7 +168,7 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, switch (action) { case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: - mlx5_unload_one_devl_locked(dev); + mlx5_unload_one_devl_locked(dev, false); break; case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET) @@ -437,6 +438,53 @@ static int mlx5_devlink_large_group_num_validate(struct devlink *devlink, u32 id return 0; } +static int mlx5_devlink_esw_multiport_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (!MLX5_ESWITCH_MANAGER(dev)) + return -EOPNOTSUPP; + + if (ctx->val.vbool) + return mlx5_lag_mpesw_enable(dev); + + mlx5_lag_mpesw_disable(dev); + return 0; +} + +static int mlx5_devlink_esw_multiport_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (!MLX5_ESWITCH_MANAGER(dev)) + return -EOPNOTSUPP; + + ctx->val.vbool = mlx5_lag_is_mpesw(dev); + return 0; +} + +static int mlx5_devlink_esw_multiport_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (!MLX5_ESWITCH_MANAGER(dev)) { + NL_SET_ERR_MSG_MOD(extack, "E-Switch is unsupported"); + return -EOPNOTSUPP; + } + + if (mlx5_eswitch_mode(dev) != MLX5_ESWITCH_OFFLOADS) { + NL_SET_ERR_MSG_MOD(extack, + "E-Switch must be in switchdev mode"); + return -EBUSY; + } + + return 0; +} + #endif static int mlx5_devlink_eq_depth_validate(struct devlink *devlink, u32 id, @@ -455,6 +503,12 @@ static const struct devlink_param mlx5_devlink_params[] = { BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, mlx5_devlink_large_group_num_validate), + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_ESW_MULTIPORT, + "esw_multiport", DEVLINK_PARAM_TYPE_BOOL, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + mlx5_devlink_esw_multiport_get, + mlx5_devlink_esw_multiport_set, + mlx5_devlink_esw_multiport_validate), #endif DEVLINK_PARAM_GENERIC(IO_EQ_SIZE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, mlx5_devlink_eq_depth_validate), diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h index b561107e0df1..212b12424146 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h @@ -11,6 +11,7 @@ enum mlx5_devlink_param_id { MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE, MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM, MLX5_DEVLINK_PARAM_ID_ESW_PORT_METADATA, + MLX5_DEVLINK_PARAM_ID_ESW_MULTIPORT, }; struct mlx5_trap_ctx { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 125c7cb7d839..88460b7796e5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -975,6 +975,7 @@ struct mlx5e_priv { struct mlx5e_dev { struct mlx5e_priv *priv; + struct devlink_port dl_port; }; struct mlx5e_rx_handlers { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c index 03ad3b61dfc7..c6b6e290fd79 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c @@ -7,12 +7,14 @@ static const struct devlink_ops mlx5e_devlink_ops = { }; -struct mlx5e_dev *mlx5e_create_devlink(struct device *dev) +struct mlx5e_dev *mlx5e_create_devlink(struct device *dev, + struct mlx5_core_dev *mdev) { struct mlx5e_dev *mlx5e_dev; struct devlink *devlink; - devlink = devlink_alloc(&mlx5e_devlink_ops, sizeof(*mlx5e_dev), dev); + devlink = devlink_alloc_ns(&mlx5e_devlink_ops, sizeof(*mlx5e_dev), + devlink_net(priv_to_devlink(mdev)), dev); if (!devlink) return ERR_PTR(-ENOMEM); devlink_register(devlink); @@ -38,39 +40,35 @@ mlx5e_devlink_get_port_parent_id(struct mlx5_core_dev *dev, struct netdev_phys_i } int mlx5e_devlink_port_register(struct mlx5e_dev *mlx5e_dev, - struct mlx5e_priv *priv) + struct mlx5_core_dev *mdev) { struct devlink *devlink = priv_to_devlink(mlx5e_dev); struct devlink_port_attrs attrs = {}; struct netdev_phys_item_id ppid = {}; - struct devlink_port *dl_port; unsigned int dl_port_index; - if (mlx5_core_is_pf(priv->mdev)) { + if (mlx5_core_is_pf(mdev)) { attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; - attrs.phys.port_number = mlx5_get_dev_index(priv->mdev); - if (MLX5_ESWITCH_MANAGER(priv->mdev)) { - mlx5e_devlink_get_port_parent_id(priv->mdev, &ppid); + attrs.phys.port_number = mlx5_get_dev_index(mdev); + if (MLX5_ESWITCH_MANAGER(mdev)) { + mlx5e_devlink_get_port_parent_id(mdev, &ppid); memcpy(attrs.switch_id.id, ppid.id, ppid.id_len); attrs.switch_id.id_len = ppid.id_len; } - dl_port_index = mlx5_esw_vport_to_devlink_port_index(priv->mdev, + dl_port_index = mlx5_esw_vport_to_devlink_port_index(mdev, MLX5_VPORT_UPLINK); } else { attrs.flavour = DEVLINK_PORT_FLAVOUR_VIRTUAL; - dl_port_index = mlx5_esw_vport_to_devlink_port_index(priv->mdev, 0); + dl_port_index = mlx5_esw_vport_to_devlink_port_index(mdev, 0); } - dl_port = mlx5e_devlink_get_dl_port(priv); - memset(dl_port, 0, sizeof(*dl_port)); - devlink_port_attrs_set(dl_port, &attrs); + devlink_port_attrs_set(&mlx5e_dev->dl_port, &attrs); - return devlink_port_register(devlink, dl_port, dl_port_index); + return devlink_port_register(devlink, &mlx5e_dev->dl_port, + dl_port_index); } -void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv) +void mlx5e_devlink_port_unregister(struct mlx5e_dev *mlx5e_dev) { - struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv); - - devlink_port_unregister(dl_port); + devlink_port_unregister(&mlx5e_dev->dl_port); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h index 19b1d8e9634e..d5ec4461f300 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h @@ -7,16 +7,11 @@ #include <net/devlink.h> #include "en.h" -struct mlx5e_dev *mlx5e_create_devlink(struct device *dev); +struct mlx5e_dev *mlx5e_create_devlink(struct device *dev, + struct mlx5_core_dev *mdev); void mlx5e_destroy_devlink(struct mlx5e_dev *mlx5e_dev); int mlx5e_devlink_port_register(struct mlx5e_dev *mlx5e_dev, - struct mlx5e_priv *priv); -void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv); - -static inline struct devlink_port * -mlx5e_devlink_get_dl_port(struct mlx5e_priv *priv) -{ - return &priv->mdev->mlx5e_res.dl_port; -} + struct mlx5_core_dev *mdev); +void mlx5e_devlink_port_unregister(struct mlx5e_dev *mlx5e_dev); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c index b6f5c1bcdbcd..016a61c52c45 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c @@ -120,8 +120,8 @@ int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev, priv = netdev_priv(netdev); rpriv = priv->ppriv; - err = mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport, - mdata->metadata_reg_c_0); + err = mlx5_esw_acl_ingress_vport_metadata_update(esw, rpriv->rep->vport, + mdata->metadata_reg_c_0); if (err) goto ingress_err; @@ -167,7 +167,7 @@ void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw, /* Reset bond_metadata to zero first then reset all ingress/egress * acls and rx rules of unslave representor's vport */ - mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport, 0); + mlx5_esw_acl_ingress_vport_metadata_update(esw, rpriv->rep->vport, 0); mlx5_esw_acl_egress_vport_unbond(esw, rpriv->rep->vport); mlx5e_rep_bond_update(priv, false); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index 95edab4a1732..c462fe76495b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -736,10 +736,10 @@ static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = { void mlx5e_reporter_rx_create(struct mlx5e_priv *priv) { - struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv); struct devlink_health_reporter *reporter; - reporter = devlink_port_health_reporter_create(dl_port, &mlx5_rx_reporter_ops, + reporter = devlink_port_health_reporter_create(priv->netdev->devlink_port, + &mlx5_rx_reporter_ops, MLX5E_REPORTER_RX_GRACEFUL_PERIOD, priv); if (IS_ERR(reporter)) { netdev_warn(priv->netdev, "Failed to create rx reporter, err = %ld\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 41e356d9d785..34666e2b3871 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -594,10 +594,10 @@ static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = { void mlx5e_reporter_tx_create(struct mlx5e_priv *priv) { - struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv); struct devlink_health_reporter *reporter; - reporter = devlink_port_health_reporter_create(dl_port, &mlx5_tx_reporter_ops, + reporter = devlink_port_health_reporter_create(priv->netdev->devlink_port, + &mlx5_tx_reporter_ops, MLX5_REPORTER_TX_GRACEFUL_PERIOD, priv); if (IS_ERR(reporter)) { netdev_warn(priv->netdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c index c095a12346de..07cc65596f89 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c @@ -216,7 +216,6 @@ parse_mirred(struct mlx5e_tc_act_parse_state *parse_state, struct net_device *uplink_dev; struct mlx5e_priv *out_priv; struct mlx5_eswitch *esw; - bool is_uplink_rep; int *ifindexes; int if_count; int err; @@ -231,7 +230,6 @@ parse_mirred(struct mlx5e_tc_act_parse_state *parse_state, parse_state->ifindexes[if_count] = out_dev->ifindex; parse_state->if_count++; - is_uplink_rep = mlx5e_eswitch_uplink_rep(out_dev); if (mlx5_lag_mpesw_do_mirred(priv->mdev, out_dev, extack)) return -EOPNOTSUPP; @@ -275,13 +273,6 @@ parse_mirred(struct mlx5e_tc_act_parse_state *parse_state, esw_attr->dests[esw_attr->out_count].rep = rpriv->rep; esw_attr->dests[esw_attr->out_count].mdev = out_priv->mdev; - /* If output device is bond master then rules are not explicit - * so we don't attempt to count them. - */ - if (is_uplink_rep && MLX5_CAP_PORT_SELECTION(priv->mdev, port_select_flow_table) && - MLX5_CAP_GEN(priv->mdev, create_lag_when_not_master_up)) - attr->lag.count = true; - esw_attr->out_count++; return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 83bb0811e774..00a04fdd756f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -93,11 +93,11 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv, else return -EOPNOTSUPP; - if (!(mlx5e_eswitch_rep(*out_dev) && - mlx5e_is_uplink_rep(netdev_priv(*out_dev)))) + if (!mlx5e_eswitch_uplink_rep(*out_dev)) return -EOPNOTSUPP; - if (mlx5e_eswitch_uplink_rep(priv->netdev) && *out_dev != priv->netdev) + if (mlx5e_eswitch_uplink_rep(priv->netdev) && *out_dev != priv->netdev && + !mlx5_lag_is_mpesw(priv->mdev)) return -EOPNOTSUPP; return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index ec81d935262f..53feb0529943 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5898,17 +5898,24 @@ static int mlx5e_probe(struct auxiliary_device *adev, struct mlx5e_priv *priv; int err; - mlx5e_dev = mlx5e_create_devlink(&adev->dev); + mlx5e_dev = mlx5e_create_devlink(&adev->dev, mdev); if (IS_ERR(mlx5e_dev)) return PTR_ERR(mlx5e_dev); auxiliary_set_drvdata(adev, mlx5e_dev); + err = mlx5e_devlink_port_register(mlx5e_dev, mdev); + if (err) { + mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err); + goto err_devlink_unregister; + } + netdev = mlx5e_create_netdev(mdev, profile); if (!netdev) { mlx5_core_err(mdev, "mlx5e_create_netdev failed\n"); err = -ENOMEM; - goto err_devlink_unregister; + goto err_devlink_port_unregister; } + SET_NETDEV_DEVLINK_PORT(netdev, &mlx5e_dev->dl_port); mlx5e_build_nic_netdev(netdev); @@ -5921,16 +5928,10 @@ static int mlx5e_probe(struct auxiliary_device *adev, priv->dfs_root = debugfs_create_dir("nic", mlx5_debugfs_get_dev_root(priv->mdev)); - err = mlx5e_devlink_port_register(mlx5e_dev, priv); - if (err) { - mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err); - goto err_destroy_netdev; - } - err = profile->init(mdev, netdev); if (err) { mlx5_core_err(mdev, "mlx5e_nic_profile init failed, %d\n", err); - goto err_devlink_cleanup; + goto err_destroy_netdev; } err = mlx5e_resume(adev); @@ -5939,7 +5940,6 @@ static int mlx5e_probe(struct auxiliary_device *adev, goto err_profile_cleanup; } - SET_NETDEV_DEVLINK_PORT(netdev, mlx5e_devlink_get_dl_port(priv)); err = register_netdev(netdev); if (err) { mlx5_core_err(mdev, "register_netdev failed, %d\n", err); @@ -5955,11 +5955,11 @@ err_resume: mlx5e_suspend(adev, state); err_profile_cleanup: profile->cleanup(priv); -err_devlink_cleanup: - mlx5e_devlink_port_unregister(priv); err_destroy_netdev: debugfs_remove_recursive(priv->dfs_root); mlx5e_destroy_netdev(priv); +err_devlink_port_unregister: + mlx5e_devlink_port_unregister(mlx5e_dev); err_devlink_unregister: mlx5e_destroy_devlink(mlx5e_dev); return err; @@ -5976,9 +5976,9 @@ static void mlx5e_remove(struct auxiliary_device *adev) unregister_netdev(priv->netdev); mlx5e_suspend(adev, state); priv->profile->cleanup(priv); - mlx5e_devlink_port_unregister(priv); debugfs_remove_recursive(priv->dfs_root); mlx5e_destroy_netdev(priv); + mlx5e_devlink_port_unregister(mlx5e_dev); mlx5e_destroy_devlink(mlx5e_dev); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 8d29310c7e48..9b9203443085 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1007,8 +1007,23 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) priv->rx_res = NULL; } +static void mlx5e_rep_mpesw_work(struct work_struct *work) +{ + struct mlx5_rep_uplink_priv *uplink_priv = + container_of(work, struct mlx5_rep_uplink_priv, + mpesw_work); + struct mlx5e_rep_priv *rpriv = + container_of(uplink_priv, struct mlx5e_rep_priv, + uplink_priv); + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + + rep_vport_rx_rule_destroy(priv); + mlx5e_create_rep_vport_rx_rule(priv); +} + static int mlx5e_init_ul_rep_rx(struct mlx5e_priv *priv) { + struct mlx5e_rep_priv *rpriv = priv->ppriv; int err; mlx5e_create_q_counters(priv); @@ -1018,12 +1033,17 @@ static int mlx5e_init_ul_rep_rx(struct mlx5e_priv *priv) mlx5e_tc_int_port_init_rep_rx(priv); + INIT_WORK(&rpriv->uplink_priv.mpesw_work, mlx5e_rep_mpesw_work); + out: return err; } static void mlx5e_cleanup_ul_rep_rx(struct mlx5e_priv *priv) { + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + cancel_work_sync(&rpriv->uplink_priv.mpesw_work); mlx5e_tc_int_port_cleanup_rep_rx(priv); mlx5e_cleanup_rep_rx(priv); mlx5e_destroy_q_counters(priv); @@ -1132,6 +1152,19 @@ static int mlx5e_update_rep_rx(struct mlx5e_priv *priv) return 0; } +static int mlx5e_rep_event_mpesw(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + + if (rep->vport != MLX5_VPORT_UPLINK) + return NOTIFY_DONE; + + queue_work(priv->wq, &rpriv->uplink_priv.mpesw_work); + + return NOTIFY_OK; +} + static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event, void *data) { struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb); @@ -1153,6 +1186,8 @@ static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event if (event == MLX5_DEV_EVENT_PORT_AFFINITY) return mlx5e_rep_tc_event_port_affinity(priv); + else if (event == MLX5_DEV_EVENT_MULTIPORT_ESW) + return mlx5e_rep_event_mpesw(priv); return NOTIFY_DONE; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 0abe3313c673..dcfad0bf0f45 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -103,6 +103,8 @@ struct mlx5_rep_uplink_priv { /* tc action stats */ struct mlx5e_tc_act_stats_handle *action_stats_handle; + + struct work_struct mpesw_work; }; struct mlx5e_rep_priv { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index a9473a51edc1..b2c7ec4692f0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -2570,10 +2570,8 @@ int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool static void mlx5e_trap_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { - struct mlx5e_priv *priv = netdev_priv(rq->netdev); struct mlx5_wq_cyc *wq = &rq->wqe.wq; struct mlx5e_wqe_frag_info *wi; - struct devlink_port *dl_port; struct sk_buff *skb; u32 cqe_bcnt; u16 trap_id; @@ -2596,8 +2594,8 @@ static void mlx5e_trap_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); skb_push(skb, ETH_HLEN); - dl_port = mlx5e_devlink_get_dl_port(priv); - mlx5_devlink_trap_report(rq->mdev, trap_id, skb, dl_port); + mlx5_devlink_trap_report(rq->mdev, trap_id, skb, + rq->netdev->devlink_port); dev_kfree_skb_any(skb); free_wqe: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 2d06b4412762..9bbd31e304be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2152,9 +2152,6 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, free_branch_attr(flow, attr->branch_true); free_branch_attr(flow, attr->branch_false); - if (flow->attr->lag.count) - mlx5_lag_del_mpesw_rule(esw->dev); - kvfree(attr->esw_attr->rx_tun_attr); kvfree(attr->parse_attr); kfree(flow->attr); @@ -3681,7 +3678,6 @@ out_ok: static bool actions_match_supported_fdb(struct mlx5e_priv *priv, - struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { @@ -3730,7 +3726,7 @@ actions_match_supported(struct mlx5e_priv *priv, return false; if (mlx5e_is_eswitch_flow(flow) && - !actions_match_supported_fdb(priv, parse_attr, flow, extack)) + !actions_match_supported_fdb(priv, flow, extack)) return false; return true; @@ -4314,12 +4310,7 @@ static bool is_lag_dev(struct mlx5e_priv *priv, static bool is_multiport_eligible(struct mlx5e_priv *priv, struct net_device *out_dev) { - if (same_hw_reps(priv, out_dev) && - MLX5_CAP_PORT_SELECTION(priv->mdev, port_select_flow_table) && - MLX5_CAP_GEN(priv->mdev, create_lag_when_not_master_up)) - return true; - - return false; + return same_hw_reps(priv, out_dev) && mlx5_lag_is_mpesw(priv->mdev); } bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, @@ -4490,6 +4481,9 @@ static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) (is_rep_ingress || act_is_encap)) return true; + if (mlx5_lag_is_mpesw(esw_attr->in_mdev)) + return true; + return false; } @@ -4621,7 +4615,6 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5_core_dev *in_mdev) { struct flow_rule *rule = flow_cls_offload_flow_rule(f); - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct netlink_ext_ack *extack = f->common.extack; struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5e_tc_flow *flow; @@ -4654,26 +4647,17 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, if (err) goto err_free; - if (flow->attr->lag.count) { - err = mlx5_lag_add_mpesw_rule(esw->dev); - if (err) - goto err_free; - } - err = mlx5e_tc_add_fdb_flow(priv, flow, extack); complete_all(&flow->init_done); if (err) { if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev))) - goto err_lag; + goto err_free; add_unready_flow(flow); } return flow; -err_lag: - if (flow->attr->lag.count) - mlx5_lag_del_mpesw_rule(esw->dev); err_free: mlx5e_flow_put(priv, flow); out: @@ -4705,8 +4689,10 @@ static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f, * So packets redirected to uplink use the same mdev of the * original flow and packets redirected from uplink use the * peer mdev. + * In multiport eswitch it's a special case that we need to + * keep the original mdev. */ - if (attr->in_rep->vport == MLX5_VPORT_UPLINK) + if (attr->in_rep->vport == MLX5_VPORT_UPLINK && !mlx5_lag_is_mpesw(priv->mdev)) in_mdev = peer_priv->mdev; else in_mdev = priv->mdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index e8e39fdcda73..f6b10bd3368b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -92,12 +92,6 @@ struct mlx5_flow_attr { u32 exe_aso_type; struct list_head list; struct mlx5e_post_act_handle *post_act_handle; - struct { - /* Indicate whether the parsed flow should be counted for lag mode decision - * making - */ - bool count; - } lag; struct mlx5_flow_attr *branch_true; struct mlx5_flow_attr *branch_false; struct mlx5_flow_attr *jumping_attr; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c index a994e71e05c1..d55775627a47 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c @@ -356,8 +356,8 @@ void esw_acl_ingress_ofld_cleanup(struct mlx5_eswitch *esw, } /* Caller must hold rtnl_lock */ -int mlx5_esw_acl_ingress_vport_bond_update(struct mlx5_eswitch *esw, u16 vport_num, - u32 metadata) +int mlx5_esw_acl_ingress_vport_metadata_update(struct mlx5_eswitch *esw, u16 vport_num, + u32 metadata) { struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); int err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h index 11d3d3978848..c9f8469e9a47 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h @@ -24,8 +24,8 @@ static inline bool mlx5_esw_acl_egress_fwd2vport_supported(struct mlx5_eswitch * /* Eswitch acl ingress external APIs */ int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); void esw_acl_ingress_ofld_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); -int mlx5_esw_acl_ingress_vport_bond_update(struct mlx5_eswitch *esw, u16 vport_num, - u32 metadata); +int mlx5_esw_acl_ingress_vport_metadata_update(struct mlx5_eswitch *esw, u16 vport_num, + u32 metadata); void mlx5_esw_acl_ingress_vport_drop_rule_destroy(struct mlx5_eswitch *esw, u16 vport_num); int mlx5_esw_acl_ingress_vport_drop_rule_create(struct mlx5_eswitch *esw, u16 vport_num); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 8fb09143e9e8..2a98375a0abf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -443,7 +443,7 @@ esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *f MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id); dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; if (dest[dest_idx].vport.num == MLX5_VPORT_UPLINK && - mlx5_lag_mpesw_is_activated(esw->dev)) + mlx5_lag_is_mpesw(esw->dev)) dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_UPLINK; } if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP_VALID) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 63290da84010..4c2dad9d7cfb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -163,11 +163,11 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev) if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) { complete(&fw_reset->done); } else { - mlx5_unload_one(dev); + mlx5_unload_one(dev, false); if (mlx5_health_wait_pci_up(dev)) mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n"); else - mlx5_load_one(dev, false); + mlx5_load_one(dev); devlink_remote_reload_actions_performed(priv_to_devlink(dev), 0, BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) | BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE)); @@ -498,7 +498,7 @@ int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev) } err = fw_reset->ret; if (test_and_clear_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags)) { - mlx5_unload_one_devl_locked(dev); + mlx5_unload_one_devl_locked(dev, false); mlx5_load_one_devl_locked(dev, false); } out: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 1e8bee906c31..f9438d4e43ca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -699,7 +699,7 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work) * requests from the kernel. */ mlx5_core_err(dev, "Driver is in error state. Unloading\n"); - mlx5_unload_one(dev); + mlx5_unload_one(dev, false); } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index dbf218cac535..5d331b940f4d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -230,7 +230,6 @@ static void mlx5_ldev_free(struct kref *ref) mlx5_lag_mp_cleanup(ldev); cancel_delayed_work_sync(&ldev->bond_work); destroy_workqueue(ldev->wq); - mlx5_lag_mpesw_cleanup(ldev); mutex_destroy(&ldev->lock); kfree(ldev); } @@ -276,7 +275,6 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev) mlx5_core_err(dev, "Failed to init multipath lag err=%d\n", err); - mlx5_lag_mpesw_init(ldev); ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports); ldev->buckets = 1; @@ -646,7 +644,7 @@ int mlx5_activate_lag(struct mlx5_lag *ldev, return 0; } -static int mlx5_deactivate_lag(struct mlx5_lag *ldev) +int mlx5_deactivate_lag(struct mlx5_lag *ldev) { struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; @@ -688,7 +686,7 @@ static int mlx5_deactivate_lag(struct mlx5_lag *ldev) } #define MLX5_LAG_OFFLOADS_SUPPORTED_PORTS 2 -static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) +bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) { #ifdef CONFIG_MLX5_ESWITCH struct mlx5_core_dev *dev; @@ -723,7 +721,7 @@ static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) return true; } -static void mlx5_lag_add_devices(struct mlx5_lag *ldev) +void mlx5_lag_add_devices(struct mlx5_lag *ldev) { int i; @@ -740,7 +738,7 @@ static void mlx5_lag_add_devices(struct mlx5_lag *ldev) } } -static void mlx5_lag_remove_devices(struct mlx5_lag *ldev) +void mlx5_lag_remove_devices(struct mlx5_lag *ldev) { int i; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index 66013bef9939..bc1f1dd3e283 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -102,6 +102,7 @@ mlx5_lag_is_ready(struct mlx5_lag *ldev) return test_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); } +bool mlx5_lag_check_prereq(struct mlx5_lag *ldev); void mlx5_modify_lag(struct mlx5_lag *ldev, struct lag_tracker *tracker); int mlx5_activate_lag(struct mlx5_lag *ldev, @@ -119,5 +120,8 @@ void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports, void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev); void mlx5_ldev_remove_debugfs(struct dentry *dbg); void mlx5_disable_lag(struct mlx5_lag *ldev); +void mlx5_lag_remove_devices(struct mlx5_lag *ldev); +int mlx5_deactivate_lag(struct mlx5_lag *ldev); +void mlx5_lag_add_devices(struct mlx5_lag *ldev); #endif /* __MLX5_LAG_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c index 3799f89ed1a6..0c0ef600f643 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c @@ -5,39 +5,121 @@ #include <net/nexthop.h> #include "lag/lag.h" #include "eswitch.h" +#include "esw/acl/ofld.h" #include "lib/mlx5.h" -static int add_mpesw_rule(struct mlx5_lag *ldev) +static void mlx5_mpesw_metadata_cleanup(struct mlx5_lag *ldev) { - struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; - int err; + struct mlx5_core_dev *dev; + struct mlx5_eswitch *esw; + u32 pf_metadata; + int i; + + for (i = 0; i < ldev->ports; i++) { + dev = ldev->pf[i].dev; + esw = dev->priv.eswitch; + pf_metadata = ldev->lag_mpesw.pf_metadata[i]; + if (!pf_metadata) + continue; + mlx5_esw_acl_ingress_vport_metadata_update(esw, MLX5_VPORT_UPLINK, 0); + mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_MULTIPORT_ESW, + (void *)0); + mlx5_esw_match_metadata_free(esw, pf_metadata); + ldev->lag_mpesw.pf_metadata[i] = 0; + } +} - if (atomic_add_return(1, &ldev->lag_mpesw.mpesw_rule_count) != 1) - return 0; +static int mlx5_mpesw_metadata_set(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev; + struct mlx5_eswitch *esw; + u32 pf_metadata; + int i, err; + + for (i = 0; i < ldev->ports; i++) { + dev = ldev->pf[i].dev; + esw = dev->priv.eswitch; + pf_metadata = mlx5_esw_match_metadata_alloc(esw); + if (!pf_metadata) { + err = -ENOSPC; + goto err_metadata; + } + + ldev->lag_mpesw.pf_metadata[i] = pf_metadata; + err = mlx5_esw_acl_ingress_vport_metadata_update(esw, MLX5_VPORT_UPLINK, + pf_metadata); + if (err) + goto err_metadata; + } - if (ldev->mode != MLX5_LAG_MODE_NONE) { - err = -EINVAL; - goto out_err; + for (i = 0; i < ldev->ports; i++) { + dev = ldev->pf[i].dev; + mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_MULTIPORT_ESW, + (void *)0); } - err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, false); + return 0; + +err_metadata: + mlx5_mpesw_metadata_cleanup(ldev); + return err; +} + +static int enable_mpesw(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; + int err; + + if (ldev->mode != MLX5_LAG_MODE_NONE) + return -EINVAL; + + if (mlx5_eswitch_mode(dev0) != MLX5_ESWITCH_OFFLOADS || + !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table) || + !MLX5_CAP_GEN(dev0, create_lag_when_not_master_up) || + !mlx5_lag_check_prereq(ldev)) + return -EOPNOTSUPP; + + err = mlx5_mpesw_metadata_set(ldev); + if (err) + return err; + + mlx5_lag_remove_devices(ldev); + + err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, true); if (err) { - mlx5_core_warn(dev, "Failed to create LAG in MPESW mode (%d)\n", err); - goto out_err; + mlx5_core_warn(dev0, "Failed to create LAG in MPESW mode (%d)\n", err); + goto err_add_devices; } + dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(dev0); + err = mlx5_eswitch_reload_reps(dev0->priv.eswitch); + if (!err) + err = mlx5_eswitch_reload_reps(dev1->priv.eswitch); + if (err) + goto err_rescan_drivers; + return 0; -out_err: - atomic_dec(&ldev->lag_mpesw.mpesw_rule_count); +err_rescan_drivers: + dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(dev0); + mlx5_deactivate_lag(ldev); +err_add_devices: + mlx5_lag_add_devices(ldev); + mlx5_eswitch_reload_reps(dev0->priv.eswitch); + mlx5_eswitch_reload_reps(dev1->priv.eswitch); + mlx5_mpesw_metadata_cleanup(ldev); return err; } -static void del_mpesw_rule(struct mlx5_lag *ldev) +static void disable_mpesw(struct mlx5_lag *ldev) { - if (!atomic_dec_return(&ldev->lag_mpesw.mpesw_rule_count) && - ldev->mode == MLX5_LAG_MODE_MPESW) + if (ldev->mode == MLX5_LAG_MODE_MPESW) { + mlx5_mpesw_metadata_cleanup(ldev); mlx5_disable_lag(ldev); + } } static void mlx5_mpesw_work(struct work_struct *work) @@ -45,13 +127,20 @@ static void mlx5_mpesw_work(struct work_struct *work) struct mlx5_mpesw_work_st *mpesww = container_of(work, struct mlx5_mpesw_work_st, work); struct mlx5_lag *ldev = mpesww->lag; + mlx5_dev_list_lock(); mutex_lock(&ldev->lock); + if (ldev->mode_changes_in_progress) { + mpesww->result = -EAGAIN; + goto unlock; + } + if (mpesww->op == MLX5_MPESW_OP_ENABLE) - mpesww->result = add_mpesw_rule(ldev); + mpesww->result = enable_mpesw(ldev); else if (mpesww->op == MLX5_MPESW_OP_DISABLE) - del_mpesw_rule(ldev); + disable_mpesw(ldev); +unlock: mutex_unlock(&ldev->lock); - + mlx5_dev_list_unlock(); complete(&mpesww->comp); } @@ -86,12 +175,12 @@ out: return err; } -void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev) +void mlx5_lag_mpesw_disable(struct mlx5_core_dev *dev) { mlx5_lag_mpesw_queue_work(dev, MLX5_MPESW_OP_DISABLE); } -int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev) +int mlx5_lag_mpesw_enable(struct mlx5_core_dev *dev) { return mlx5_lag_mpesw_queue_work(dev, MLX5_MPESW_OP_ENABLE); } @@ -112,19 +201,10 @@ int mlx5_lag_mpesw_do_mirred(struct mlx5_core_dev *mdev, return -EOPNOTSUPP; } -bool mlx5_lag_mpesw_is_activated(struct mlx5_core_dev *dev) +bool mlx5_lag_is_mpesw(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev = mlx5_lag_dev(dev); return ldev && ldev->mode == MLX5_LAG_MODE_MPESW; } - -void mlx5_lag_mpesw_init(struct mlx5_lag *ldev) -{ - atomic_set(&ldev->lag_mpesw.mpesw_rule_count, 0); -} - -void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev) -{ - WARN_ON(atomic_read(&ldev->lag_mpesw.mpesw_rule_count)); -} +EXPORT_SYMBOL(mlx5_lag_is_mpesw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h index 818f19b5a984..02520f27a033 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h @@ -9,7 +9,7 @@ struct lag_mpesw { struct work_struct mpesw_work; - atomic_t mpesw_rule_count; + u32 pf_metadata[MLX5_MAX_PORTS]; }; enum mpesw_op { @@ -28,15 +28,8 @@ struct mlx5_mpesw_work_st { int mlx5_lag_mpesw_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev, struct netlink_ext_ack *extack); -bool mlx5_lag_mpesw_is_activated(struct mlx5_core_dev *dev); -void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev); -int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev); -#if IS_ENABLED(CONFIG_MLX5_ESWITCH) -void mlx5_lag_mpesw_init(struct mlx5_lag *ldev); -void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev); -#else -static inline void mlx5_lag_mpesw_init(struct mlx5_lag *ldev) {} -static inline void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev) {} -#endif +bool mlx5_lag_is_mpesw(struct mlx5_core_dev *dev); +void mlx5_lag_mpesw_disable(struct mlx5_core_dev *dev); +int mlx5_lag_mpesw_enable(struct mlx5_core_dev *dev); #endif /* __MLX5_LAG_MPESW_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 26e1057845fe..540840e80493 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1509,23 +1509,23 @@ out: return err; } -int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery) +int mlx5_load_one(struct mlx5_core_dev *dev) { struct devlink *devlink = priv_to_devlink(dev); int ret; devl_lock(devlink); - ret = mlx5_load_one_devl_locked(dev, recovery); + ret = mlx5_load_one_devl_locked(dev, false); devl_unlock(devlink); return ret; } -void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev) +void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev, bool suspend) { devl_assert_locked(priv_to_devlink(dev)); mutex_lock(&dev->intf_state_mutex); - mlx5_detach_device(dev); + mlx5_detach_device(dev, suspend); if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { mlx5_core_warn(dev, "%s: interface is down, NOP\n", @@ -1540,12 +1540,12 @@ out: mutex_unlock(&dev->intf_state_mutex); } -void mlx5_unload_one(struct mlx5_core_dev *dev) +void mlx5_unload_one(struct mlx5_core_dev *dev, bool suspend) { struct devlink *devlink = priv_to_devlink(dev); devl_lock(devlink); - mlx5_unload_one_devl_locked(dev); + mlx5_unload_one_devl_locked(dev, suspend); devl_unlock(devlink); } @@ -1830,7 +1830,7 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, mlx5_enter_error_state(dev, false); mlx5_error_sw_reset(dev); - mlx5_unload_one(dev); + mlx5_unload_one(dev, true); mlx5_drain_health_wq(dev); mlx5_pci_disable_device(dev); @@ -1912,8 +1912,7 @@ static void mlx5_pci_resume(struct pci_dev *pdev) mlx5_pci_trace(dev, "Enter, loading driver..\n"); - err = mlx5_load_one(dev, false); - + err = mlx5_load_one(dev); if (!err) devlink_health_reporter_state_update(dev->priv.health.fw_fatal_reporter, DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); @@ -1987,7 +1986,7 @@ static void shutdown(struct pci_dev *pdev) set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state); err = mlx5_try_fast_unload(dev); if (err) - mlx5_unload_one(dev); + mlx5_unload_one(dev, false); mlx5_pci_disable_device(dev); } @@ -1995,7 +1994,7 @@ static int mlx5_suspend(struct pci_dev *pdev, pm_message_t state) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - mlx5_unload_one(dev); + mlx5_unload_one(dev, true); return 0; } @@ -2004,7 +2003,7 @@ static int mlx5_resume(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - return mlx5_load_one(dev, false); + return mlx5_load_one(dev); } static const struct pci_device_id mlx5_core_pci_table[] = { @@ -2038,7 +2037,7 @@ MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table); void mlx5_disable_device(struct mlx5_core_dev *dev) { mlx5_error_sw_reset(dev); - mlx5_unload_one_devl_locked(dev); + mlx5_unload_one_devl_locked(dev, false); } int mlx5_recover_device(struct mlx5_core_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 029305a8b80a..be0785f83083 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -236,7 +236,7 @@ void mlx5_adev_cleanup(struct mlx5_core_dev *dev); int mlx5_adev_init(struct mlx5_core_dev *dev); int mlx5_attach_device(struct mlx5_core_dev *dev); -void mlx5_detach_device(struct mlx5_core_dev *dev); +void mlx5_detach_device(struct mlx5_core_dev *dev, bool suspend); int mlx5_register_device(struct mlx5_core_dev *dev); void mlx5_unregister_device(struct mlx5_core_dev *dev); struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev); @@ -319,9 +319,9 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx); void mlx5_mdev_uninit(struct mlx5_core_dev *dev); int mlx5_init_one(struct mlx5_core_dev *dev); void mlx5_uninit_one(struct mlx5_core_dev *dev); -void mlx5_unload_one(struct mlx5_core_dev *dev); -void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev); -int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery); +void mlx5_unload_one(struct mlx5_core_dev *dev, bool suspend); +void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev, bool suspend); +int mlx5_load_one(struct mlx5_core_dev *dev); int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery); int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap, u16 function_id, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c index 7b4783ce213e..a7377619ba6f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c @@ -74,7 +74,7 @@ static void mlx5_sf_dev_shutdown(struct auxiliary_device *adev) { struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev); - mlx5_unload_one(sf_dev->mdev); + mlx5_unload_one(sf_dev->mdev, false); } static const struct auxiliary_device_id mlx5_sf_dev_id_table[] = { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index ecd3b5448fe9..a170c8565779 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -217,6 +217,7 @@ struct mlx5_rsc_debug { enum mlx5_dev_event { MLX5_DEV_EVENT_SYS_ERROR = 128, /* 0 - 127 are FW events */ MLX5_DEV_EVENT_PORT_AFFINITY = 129, + MLX5_DEV_EVENT_MULTIPORT_ESW = 130, }; enum mlx5_port_status { @@ -678,7 +679,6 @@ struct mlx5e_resources { u32 mkey; struct mlx5_sq_bfreg bfreg; } hw_objs; - struct devlink_port dl_port; struct net_device *uplink_netdev; struct mutex uplink_netdev_lock; struct mlx5_crypto_dek_priv *dek_priv; @@ -1161,6 +1161,7 @@ bool mlx5_lag_is_active(struct mlx5_core_dev *dev); bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev); bool mlx5_lag_is_master(struct mlx5_core_dev *dev); bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev); +bool mlx5_lag_is_mpesw(struct mlx5_core_dev *dev); struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev); u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, struct net_device *slave); |