aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst46
-rw-r--r--Documentation/networking/devlink/devlink-port.rst122
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/devlink.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c43
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c211
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vport.c30
-rw-r--r--include/linux/mlx5/mlx5_ifc.h6
-rw-r--r--include/linux/mlx5/vport.h2
-rw-r--r--include/net/devlink.h39
-rw-r--r--include/uapi/linux/devlink.h13
-rw-r--r--net/core/devlink.c200
14 files changed, 686 insertions, 49 deletions
diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst
index e8fa7ac9e6b1..6969652f593c 100644
--- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst
+++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst
@@ -351,42 +351,26 @@ driver.
MAC address setup
-----------------
-mlx5 driver provides mechanism to setup the MAC address of the PCI VF/SF.
+mlx5 driver support devlink port function attr mechanism to setup MAC
+address. (refer to Documentation/networking/devlink/devlink-port.rst)
-The configured MAC address of the PCI VF/SF will be used by netdevice and rdma
-device created for the PCI VF/SF.
+RoCE capability setup
+---------------------
+Not all mlx5 PCI devices/SFs require RoCE capability.
-- Get the MAC address of the VF identified by its unique devlink port index::
+When RoCE capability is disabled, it saves 1 Mbytes worth of system memory per
+PCI devices/SF.
- $ devlink port show pci/0000:06:00.0/2
- pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1
- function:
- hw_addr 00:00:00:00:00:00
-
-- Set the MAC address of the VF identified by its unique devlink port index::
-
- $ devlink port function set pci/0000:06:00.0/2 hw_addr 00:11:22:33:44:55
-
- $ devlink port show pci/0000:06:00.0/2
- pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1
- function:
- hw_addr 00:11:22:33:44:55
-
-- Get the MAC address of the SF identified by its unique devlink port index::
-
- $ devlink port show pci/0000:06:00.0/32768
- pci/0000:06:00.0/32768: type eth netdev enp6s0pf0sf88 flavour pcisf pfnum 0 sfnum 88
- function:
- hw_addr 00:00:00:00:00:00
-
-- Set the MAC address of the SF identified by its unique devlink port index::
+mlx5 driver support devlink port function attr mechanism to setup RoCE
+capability. (refer to Documentation/networking/devlink/devlink-port.rst)
- $ devlink port function set pci/0000:06:00.0/32768 hw_addr 00:00:00:00:88:88
+migratable capability setup
+---------------------------
+User who wants mlx5 PCI VFs to be able to perform live migration need to
+explicitly enable the VF migratable capability.
- $ devlink port show pci/0000:06:00.0/32768
- pci/0000:06:00.0/32768: type eth netdev enp6s0pf0sf88 flavour pcisf pfnum 0 sfnum 88
- function:
- hw_addr 00:00:00:00:88:88
+mlx5 driver support devlink port function attr mechanism to setup migratable
+capability. (refer to Documentation/networking/devlink/devlink-port.rst)
SF state setup
--------------
diff --git a/Documentation/networking/devlink/devlink-port.rst b/Documentation/networking/devlink/devlink-port.rst
index 98557c2ab1c1..3da590953ce8 100644
--- a/Documentation/networking/devlink/devlink-port.rst
+++ b/Documentation/networking/devlink/devlink-port.rst
@@ -110,7 +110,7 @@ devlink ports for both the controllers.
Function configuration
======================
-A user can configure the function attribute before enumerating the PCI
+Users can configure one or more function attributes before enumerating the PCI
function. Usually it means, user should configure function attribute
before a bus specific device for the function is created. However, when
SRIOV is enabled, virtual function devices are created on the PCI bus.
@@ -119,9 +119,127 @@ function device to the driver. For subfunctions, this means user should
configure port function attribute before activating the port function.
A user may set the hardware address of the function using
-'devlink port function set hw_addr' command. For Ethernet port function
+`devlink port function set hw_addr` command. For Ethernet port function
this means a MAC address.
+Users may also set the RoCE capability of the function using
+`devlink port function set roce` command.
+
+Users may also set the function as migratable using
+'devlink port function set migratable' command.
+
+Function attributes
+===================
+
+MAC address setup
+-----------------
+The configured MAC address of the PCI VF/SF will be used by netdevice and rdma
+device created for the PCI VF/SF.
+
+- Get the MAC address of the VF identified by its unique devlink port index::
+
+ $ devlink port show pci/0000:06:00.0/2
+ pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1
+ function:
+ hw_addr 00:00:00:00:00:00
+
+- Set the MAC address of the VF identified by its unique devlink port index::
+
+ $ devlink port function set pci/0000:06:00.0/2 hw_addr 00:11:22:33:44:55
+
+ $ devlink port show pci/0000:06:00.0/2
+ pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1
+ function:
+ hw_addr 00:11:22:33:44:55
+
+- Get the MAC address of the SF identified by its unique devlink port index::
+
+ $ devlink port show pci/0000:06:00.0/32768
+ pci/0000:06:00.0/32768: type eth netdev enp6s0pf0sf88 flavour pcisf pfnum 0 sfnum 88
+ function:
+ hw_addr 00:00:00:00:00:00
+
+- Set the MAC address of the SF identified by its unique devlink port index::
+
+ $ devlink port function set pci/0000:06:00.0/32768 hw_addr 00:00:00:00:88:88
+
+ $ devlink port show pci/0000:06:00.0/32768
+ pci/0000:06:00.0/32768: type eth netdev enp6s0pf0sf88 flavour pcisf pfnum 0 sfnum 88
+ function:
+ hw_addr 00:00:00:00:88:88
+
+RoCE capability setup
+---------------------
+Not all PCI VFs/SFs require RoCE capability.
+
+When RoCE capability is disabled, it saves system memory per PCI VF/SF.
+
+When user disables RoCE capability for a VF/SF, user application cannot send or
+receive any RoCE packets through this VF/SF and RoCE GID table for this PCI
+will be empty.
+
+When RoCE capability is disabled in the device using port function attribute,
+VF/SF driver cannot override it.
+
+- Get RoCE capability of the VF device::
+
+ $ devlink port show pci/0000:06:00.0/2
+ pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1
+ function:
+ hw_addr 00:00:00:00:00:00 roce enable
+
+- Set RoCE capability of the VF device::
+
+ $ devlink port function set pci/0000:06:00.0/2 roce disable
+
+ $ devlink port show pci/0000:06:00.0/2
+ pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1
+ function:
+ hw_addr 00:00:00:00:00:00 roce disable
+
+migratable capability setup
+---------------------------
+Live migration is the process of transferring a live virtual machine
+from one physical host to another without disrupting its normal
+operation.
+
+User who want PCI VFs to be able to perform live migration need to
+explicitly enable the VF migratable capability.
+
+When user enables migratable capability for a VF, and the HV binds the VF to VFIO driver
+with migration support, the user can migrate the VM with this VF from one HV to a
+different one.
+
+However, when migratable capability is enable, device will disable features which cannot
+be migrated. Thus migratable cap can impose limitations on a VF so let the user decide.
+
+Example of LM with migratable function configuration:
+- Get migratable capability of the VF device::
+
+ $ devlink port show pci/0000:06:00.0/2
+ pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1
+ function:
+ hw_addr 00:00:00:00:00:00 migratable disable
+
+- Set migratable capability of the VF device::
+
+ $ devlink port function set pci/0000:06:00.0/2 migratable enable
+
+ $ devlink port show pci/0000:06:00.0/2
+ pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1
+ function:
+ hw_addr 00:00:00:00:00:00 migratable enable
+
+- Bind VF to VFIO driver with migration support::
+
+ $ echo <pci_id> > /sys/bus/pci/devices/0000:08:00.0/driver/unbind
+ $ echo mlx5_vfio_pci > /sys/bus/pci/devices/0000:08:00.0/driver_override
+ $ echo <pci_id> > /sys/bus/pci/devices/0000:08:00.0/driver/bind
+
+Attach VF to the VM.
+Start the VM.
+Perform live migration.
+
Subfunction
============
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index 751bc4a9edcf..ddb197970c22 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -314,6 +314,10 @@ static const struct devlink_ops mlx5_devlink_ops = {
.rate_node_new = mlx5_esw_devlink_rate_node_new,
.rate_node_del = mlx5_esw_devlink_rate_node_del,
.rate_leaf_parent_set = mlx5_esw_devlink_rate_parent_set,
+ .port_fn_roce_get = mlx5_devlink_port_fn_roce_get,
+ .port_fn_roce_set = mlx5_devlink_port_fn_roce_set,
+ .port_fn_migratable_get = mlx5_devlink_port_fn_migratable_get,
+ .port_fn_migratable_set = mlx5_devlink_port_fn_migratable_set,
#endif
#ifdef CONFIG_MLX5_SF_MANAGER
.port_new = mlx5_devlink_sf_port_new,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 374e3fbdc2cf..527e4bffda8d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -772,6 +772,41 @@ static void esw_vport_cleanup_acl(struct mlx5_eswitch *esw,
esw_vport_destroy_offloads_acl_tables(esw, vport);
}
+static int mlx5_esw_vport_caps_get(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+{
+ int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+ void *query_ctx;
+ void *hca_caps;
+ int err;
+
+ if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager))
+ return 0;
+
+ query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
+ if (!query_ctx)
+ return -ENOMEM;
+
+ err = mlx5_vport_get_other_func_cap(esw->dev, vport->vport, query_ctx,
+ MLX5_CAP_GENERAL);
+ if (err)
+ goto out_free;
+
+ hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
+ vport->info.roce_enabled = MLX5_GET(cmd_hca_cap, hca_caps, roce);
+
+ memset(query_ctx, 0, query_out_sz);
+ err = mlx5_vport_get_other_func_cap(esw->dev, vport->vport, query_ctx,
+ MLX5_CAP_GENERAL_2);
+ if (err)
+ goto out_free;
+
+ hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
+ vport->info.mig_enabled = MLX5_GET(cmd_hca_cap_2, hca_caps, migratable);
+out_free:
+ kfree(query_ctx);
+ return err;
+}
+
static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
{
u16 vport_num = vport->vport;
@@ -785,6 +820,10 @@ static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
if (mlx5_esw_is_manager_vport(esw, vport_num))
return 0;
+ err = mlx5_esw_vport_caps_get(esw, vport);
+ if (err)
+ goto err_caps;
+
mlx5_modify_vport_admin_state(esw->dev,
MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
vport_num, 1,
@@ -804,6 +843,10 @@ static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
vport->info.qos, flags);
return 0;
+
+err_caps:
+ esw_vport_cleanup_acl(esw, vport);
+ return err;
}
/* Don't cleanup vport->info, it's needed to restore vport configuration */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 42d9df417e20..5a85a5d32be7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -153,6 +153,8 @@ struct mlx5_vport_info {
u8 qos;
u8 spoofchk: 1;
u8 trusted: 1;
+ u8 roce_enabled: 1;
+ u8 mig_enabled: 1;
};
/* Vport context events */
@@ -508,7 +510,14 @@ int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port,
int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port,
const u8 *hw_addr, int hw_addr_len,
struct netlink_ext_ack *extack);
-
+int mlx5_devlink_port_fn_roce_get(struct devlink_port *port, bool *is_enabled,
+ struct netlink_ext_ack *extack);
+int mlx5_devlink_port_fn_roce_set(struct devlink_port *port, bool enable,
+ struct netlink_ext_ack *extack);
+int mlx5_devlink_port_fn_migratable_get(struct devlink_port *port, bool *is_enabled,
+ struct netlink_ext_ack *extack);
+int mlx5_devlink_port_fn_migratable_set(struct devlink_port *port, bool enable,
+ struct netlink_ext_ack *extack);
void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);
int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 9b6fbb19c22a..c6a14202c62c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -3889,7 +3889,7 @@ static int mlx5_esw_query_vport_vhca_id(struct mlx5_eswitch *esw, u16 vport_num,
if (!query_ctx)
return -ENOMEM;
- err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx);
+ err = mlx5_vport_get_other_func_general_cap(esw->dev, vport_num, query_ctx);
if (err)
goto out_free;
@@ -4022,3 +4022,212 @@ int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port,
return mlx5_eswitch_set_vport_mac(esw, vport_num, hw_addr);
}
+
+static struct mlx5_vport *
+mlx5_devlink_port_fn_get_vport(struct devlink_port *port, struct mlx5_eswitch *esw)
+{
+ u16 vport_num;
+
+ if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index);
+ if (!is_port_function_supported(esw, vport_num))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ return mlx5_eswitch_get_vport(esw, vport_num);
+}
+
+int mlx5_devlink_port_fn_migratable_get(struct devlink_port *port, bool *is_enabled,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_eswitch *esw;
+ struct mlx5_vport *vport;
+ int err = -EOPNOTSUPP;
+
+ esw = mlx5_devlink_eswitch_get(port->devlink);
+ if (IS_ERR(esw))
+ return PTR_ERR(esw);
+
+ if (!MLX5_CAP_GEN(esw->dev, migration)) {
+ NL_SET_ERR_MSG_MOD(extack, "Device doesn't support migration");
+ return err;
+ }
+
+ vport = mlx5_devlink_port_fn_get_vport(port, esw);
+ if (IS_ERR(vport)) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid port");
+ return PTR_ERR(vport);
+ }
+
+ mutex_lock(&esw->state_lock);
+ if (vport->enabled) {
+ *is_enabled = vport->info.mig_enabled;
+ err = 0;
+ }
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_devlink_port_fn_migratable_set(struct devlink_port *port, bool enable,
+ struct netlink_ext_ack *extack)
+{
+ int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+ struct mlx5_eswitch *esw;
+ struct mlx5_vport *vport;
+ void *query_ctx;
+ void *hca_caps;
+ int err = -EOPNOTSUPP;
+
+ esw = mlx5_devlink_eswitch_get(port->devlink);
+ if (IS_ERR(esw))
+ return PTR_ERR(esw);
+
+ if (!MLX5_CAP_GEN(esw->dev, migration)) {
+ NL_SET_ERR_MSG_MOD(extack, "Device doesn't support migration");
+ return err;
+ }
+
+ vport = mlx5_devlink_port_fn_get_vport(port, esw);
+ if (IS_ERR(vport)) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid port");
+ return PTR_ERR(vport);
+ }
+
+ mutex_lock(&esw->state_lock);
+ if (!vport->enabled) {
+ NL_SET_ERR_MSG_MOD(extack, "Eswitch vport is disabled");
+ goto out;
+ }
+
+ if (vport->info.mig_enabled == enable) {
+ err = 0;
+ goto out;
+ }
+
+ query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
+ if (!query_ctx) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = mlx5_vport_get_other_func_cap(esw->dev, vport->vport, query_ctx,
+ MLX5_CAP_GENERAL_2);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps");
+ goto out_free;
+ }
+
+ hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
+ memcpy(hca_caps, MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability),
+ MLX5_UN_SZ_BYTES(hca_cap_union));
+ MLX5_SET(cmd_hca_cap_2, hca_caps, migratable, 1);
+
+ err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport->vport,
+ MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE2);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed setting HCA migratable cap");
+ goto out_free;
+ }
+
+ vport->info.mig_enabled = enable;
+
+out_free:
+ kfree(query_ctx);
+out:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_devlink_port_fn_roce_get(struct devlink_port *port, bool *is_enabled,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_eswitch *esw;
+ struct mlx5_vport *vport;
+ int err = -EOPNOTSUPP;
+
+ esw = mlx5_devlink_eswitch_get(port->devlink);
+ if (IS_ERR(esw))
+ return PTR_ERR(esw);
+
+ vport = mlx5_devlink_port_fn_get_vport(port, esw);
+ if (IS_ERR(vport)) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid port");
+ return PTR_ERR(vport);
+ }
+
+ mutex_lock(&esw->state_lock);
+ if (vport->enabled) {
+ *is_enabled = vport->info.roce_enabled;
+ err = 0;
+ }
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_devlink_port_fn_roce_set(struct devlink_port *port, bool enable,
+ struct netlink_ext_ack *extack)
+{
+ int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+ struct mlx5_eswitch *esw;
+ struct mlx5_vport *vport;
+ int err = -EOPNOTSUPP;
+ void *query_ctx;
+ void *hca_caps;
+ u16 vport_num;
+
+ esw = mlx5_devlink_eswitch_get(port->devlink);
+ if (IS_ERR(esw))
+ return PTR_ERR(esw);
+
+ vport = mlx5_devlink_port_fn_get_vport(port, esw);
+ if (IS_ERR(vport)) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid port");
+ return PTR_ERR(vport);
+ }
+ vport_num = vport->vport;
+
+ mutex_lock(&esw->state_lock);
+ if (!vport->enabled) {
+ NL_SET_ERR_MSG_MOD(extack, "Eswitch vport is disabled");
+ goto out;
+ }
+
+ if (vport->info.roce_enabled == enable) {
+ err = 0;
+ goto out;
+ }
+
+ query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
+ if (!query_ctx) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx,
+ MLX5_CAP_GENERAL);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps");
+ goto out_free;
+ }
+
+ hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
+ memcpy(hca_caps, MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability),
+ MLX5_UN_SZ_BYTES(hca_cap_union));
+ MLX5_SET(cmd_hca_cap, hca_caps, roce, enable);
+
+ err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport_num,
+ MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed setting HCA roce cap");
+ goto out_free;
+ }
+
+ vport->info.roce_enabled = enable;
+
+out_free:
+ kfree(query_ctx);
+out:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index a806e3de7b7c..029305a8b80a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -324,7 +324,10 @@ void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev);
int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery);
int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery);
-int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out);
+int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap, u16 function_id,
+ u16 opmod);
+#define mlx5_vport_get_other_func_general_cap(dev, fid, out) \
+ mlx5_vport_get_other_func_cap(dev, fid, out, MLX5_CAP_GENERAL)
void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work);
static inline u32 mlx5_sriov_get_vf_total_msix(struct pci_dev *pdev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index 662f1d55e30e..6bde18bcd42f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -4,6 +4,7 @@
#include <linux/interrupt.h>
#include <linux/notifier.h>
#include <linux/mlx5/driver.h>
+#include <linux/mlx5/vport.h>
#include "mlx5_core.h"
#include "mlx5_irq.h"
#include "pci_irq.h"
@@ -101,7 +102,7 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id,
goto out;
}
- ret = mlx5_vport_get_other_func_cap(dev, function_id, query_cap);
+ ret = mlx5_vport_get_other_func_general_cap(dev, function_id, query_cap);
if (ret)
goto out;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index d5c317325030..ba7e3df22413 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -1160,14 +1160,40 @@ u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev)
}
EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid);
-int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out)
+int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out,
+ u16 opmod)
{
- u16 opmod = (MLX5_CAP_GENERAL << 1) | (HCA_CAP_OPMOD_GET_MAX & 0x01);
u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)] = {};
+ opmod = (opmod << 1) | (HCA_CAP_OPMOD_GET_MAX & 0x01);
MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
MLX5_SET(query_hca_cap_in, in, function_id, function_id);
MLX5_SET(query_hca_cap_in, in, other_function, true);
return mlx5_cmd_exec_inout(dev, query_hca_cap, in, out);
}
+EXPORT_SYMBOL_GPL(mlx5_vport_get_other_func_cap);
+
+int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap,
+ u16 function_id, u16 opmod)
+{
+ int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
+ void *set_hca_cap;
+ void *set_ctx;
+ int ret;
+
+ set_ctx = kzalloc(set_sz, GFP_KERNEL);
+ if (!set_ctx)
+ return -ENOMEM;
+
+ MLX5_SET(set_hca_cap_in, set_ctx, opcode, MLX5_CMD_OP_SET_HCA_CAP);
+ MLX5_SET(set_hca_cap_in, set_ctx, op_mod, opmod << 1);
+ set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
+ memcpy(set_hca_cap, hca_cap, MLX5_ST_SZ_BYTES(cmd_hca_cap));
+ MLX5_SET(set_hca_cap_in, set_ctx, function_id, function_id);
+ MLX5_SET(set_hca_cap_in, set_ctx, other_function, true);
+ ret = mlx5_cmd_exec_in(dev, set_hca_cap, set_ctx);
+
+ kfree(set_ctx);
+ return ret;
+}
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 5a4e914e2a6f..2093131483c7 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -68,6 +68,7 @@ enum {
MLX5_SET_HCA_CAP_OP_MOD_ODP = 0x2,
MLX5_SET_HCA_CAP_OP_MOD_ATOMIC = 0x3,
MLX5_SET_HCA_CAP_OP_MOD_ROCE = 0x4,
+ MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE2 = 0x20,
MLX5_SET_HCA_CAP_OP_MODE_PORT_SELECTION = 0x25,
};
@@ -1875,7 +1876,10 @@ struct mlx5_ifc_cmd_hca_cap_bits {
};
struct mlx5_ifc_cmd_hca_cap_2_bits {
- u8 reserved_at_0[0xa0];
+ u8 reserved_at_0[0x80];
+
+ u8 migratable[0x1];
+ u8 reserved_at_81[0x1f];
u8 max_reformat_insert_size[0x8];
u8 max_reformat_insert_offset[0x8];
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index aad53cb72f17..7f31432f44c2 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -132,4 +132,6 @@ int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev,
int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev);
u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev);
+int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out,
+ u16 opmod);
#endif /* __MLX5_VPORT_H__ */
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 5f6eca5e4a40..0f376a28b9c4 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1452,6 +1452,45 @@ struct devlink_ops {
const u8 *hw_addr, int hw_addr_len,
struct netlink_ext_ack *extack);
/**
+ * @port_fn_roce_get: Port function's roce get function.
+ *
+ * Query RoCE state of a function managed by the devlink port.
+ * Return -EOPNOTSUPP if port function RoCE handling is not supported.
+ */
+ int (*port_fn_roce_get)(struct devlink_port *devlink_port,
+ bool *is_enable,
+ struct netlink_ext_ack *extack);
+ /**
+ * @port_fn_roce_set: Port function's roce set function.
+ *
+ * Enable/Disable the RoCE state of a function managed by the devlink
+ * port.
+ * Return -EOPNOTSUPP if port function RoCE handling is not supported.
+ */
+ int (*port_fn_roce_set)(struct devlink_port *devlink_port,
+ bool enable, struct netlink_ext_ack *extack);
+ /**
+ * @port_fn_migratable_get: Port function's migratable get function.
+ *
+ * Query migratable state of a function managed by the devlink port.
+ * Return -EOPNOTSUPP if port function migratable handling is not
+ * supported.
+ */
+ int (*port_fn_migratable_get)(struct devlink_port *devlink_port,
+ bool *is_enable,
+ struct netlink_ext_ack *extack);
+ /**
+ * @port_fn_migratable_set: Port function's migratable set function.
+ *
+ * Enable/Disable migratable state of a function managed by the devlink
+ * port.
+ * Return -EOPNOTSUPP if port function migratable handling is not
+ * supported.
+ */
+ int (*port_fn_migratable_set)(struct devlink_port *devlink_port,
+ bool enable,
+ struct netlink_ext_ack *extack);
+ /**
* port_new() - Add a new port function of a specified flavor
* @devlink: Devlink instance
* @attrs: attributes of the new port
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 70191d96af89..3782d4219ac9 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -658,11 +658,24 @@ enum devlink_resource_unit {
DEVLINK_RESOURCE_UNIT_ENTRY,
};
+enum devlink_port_fn_attr_cap {
+ DEVLINK_PORT_FN_ATTR_CAP_ROCE_BIT,
+ DEVLINK_PORT_FN_ATTR_CAP_MIGRATABLE_BIT,
+
+ /* Add new caps above */
+ __DEVLINK_PORT_FN_ATTR_CAPS_MAX,
+};
+
+#define DEVLINK_PORT_FN_CAP_ROCE _BITUL(DEVLINK_PORT_FN_ATTR_CAP_ROCE_BIT)
+#define DEVLINK_PORT_FN_CAP_MIGRATABLE \
+ _BITUL(DEVLINK_PORT_FN_ATTR_CAP_MIGRATABLE_BIT)
+
enum devlink_port_function_attr {
DEVLINK_PORT_FUNCTION_ATTR_UNSPEC,
DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, /* binary */
DEVLINK_PORT_FN_ATTR_STATE, /* u8 */
DEVLINK_PORT_FN_ATTR_OPSTATE, /* u8 */
+ DEVLINK_PORT_FN_ATTR_CAPS, /* bitfield32 */
__DEVLINK_PORT_FUNCTION_ATTR_MAX,
DEVLINK_PORT_FUNCTION_ATTR_MAX = __DEVLINK_PORT_FUNCTION_ATTR_MAX - 1
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 907df7124157..ab40ebcb4aea 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -195,11 +195,16 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr);
EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_trap_report);
+#define DEVLINK_PORT_FN_CAPS_VALID_MASK \
+ (_BITUL(__DEVLINK_PORT_FN_ATTR_CAPS_MAX) - 1)
+
static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1] = {
[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .type = NLA_BINARY },
[DEVLINK_PORT_FN_ATTR_STATE] =
NLA_POLICY_RANGE(NLA_U8, DEVLINK_PORT_FN_STATE_INACTIVE,
DEVLINK_PORT_FN_STATE_ACTIVE),
+ [DEVLINK_PORT_FN_ATTR_CAPS] =
+ NLA_POLICY_BITFIELD32(DEVLINK_PORT_FN_CAPS_VALID_MASK),
};
static const struct nla_policy devlink_selftest_nl_policy[DEVLINK_ATTR_SELFTEST_ID_MAX + 1] = {
@@ -680,6 +685,87 @@ devlink_sb_tc_index_get_from_attrs(struct devlink_sb *devlink_sb,
return 0;
}
+static void devlink_port_fn_cap_fill(struct nla_bitfield32 *caps,
+ u32 cap, bool is_enable)
+{
+ caps->selector |= cap;
+ if (is_enable)
+ caps->value |= cap;
+}
+
+static int devlink_port_fn_roce_fill(const struct devlink_ops *ops,
+ struct devlink_port *devlink_port,
+ struct nla_bitfield32 *caps,
+ struct netlink_ext_ack *extack)
+{
+ bool is_enable;
+ int err;
+
+ if (!ops->port_fn_roce_get)
+ return 0;
+
+ err = ops->port_fn_roce_get(devlink_port, &is_enable, extack);
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+
+ devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_ROCE, is_enable);
+ return 0;
+}
+
+static int devlink_port_fn_migratable_fill(const struct devlink_ops *ops,
+ struct devlink_port *devlink_port,
+ struct nla_bitfield32 *caps,
+ struct netlink_ext_ack *extack)
+{
+ bool is_enable;
+ int err;
+
+ if (!ops->port_fn_migratable_get ||
+ devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF)
+ return 0;
+
+ err = ops->port_fn_migratable_get(devlink_port, &is_enable, extack);
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+
+ devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_MIGRATABLE, is_enable);
+ return 0;
+}
+
+static int devlink_port_fn_caps_fill(const struct devlink_ops *ops,
+ struct devlink_port *devlink_port,
+ struct sk_buff *msg,
+ struct netlink_ext_ack *extack,
+ bool *msg_updated)
+{
+ struct nla_bitfield32 caps = {};
+ int err;
+
+ err = devlink_port_fn_roce_fill(ops, devlink_port, &caps, extack);
+ if (err)
+ return err;
+
+ err = devlink_port_fn_migratable_fill(ops, devlink_port, &caps, extack);
+ if (err)
+ return err;
+
+ if (!caps.selector)
+ return 0;
+ err = nla_put_bitfield32(msg, DEVLINK_PORT_FN_ATTR_CAPS, caps.value,
+ caps.selector);
+ if (err)
+ return err;
+
+ *msg_updated = true;
+ return 0;
+}
+
static int
devlink_sb_tc_index_get_from_info(struct devlink_sb *devlink_sb,
struct genl_info *info,
@@ -1264,6 +1350,51 @@ static int devlink_port_fn_state_fill(const struct devlink_ops *ops,
}
static int
+devlink_port_fn_mig_set(struct devlink_port *devlink_port, bool enable,
+ struct netlink_ext_ack *extack)
+{
+ const struct devlink_ops *ops = devlink_port->devlink->ops;
+
+ return ops->port_fn_migratable_set(devlink_port, enable, extack);
+}
+
+static int
+devlink_port_fn_roce_set(struct devlink_port *devlink_port, bool enable,
+ struct netlink_ext_ack *extack)
+{
+ const struct devlink_ops *ops = devlink_port->devlink->ops;
+
+ return ops->port_fn_roce_set(devlink_port, enable, extack);
+}
+
+static int devlink_port_fn_caps_set(struct devlink_port *devlink_port,
+ const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct nla_bitfield32 caps;
+ u32 caps_value;
+ int err;
+
+ caps = nla_get_bitfield32(attr);
+ caps_value = caps.value & caps.selector;
+ if (caps.selector & DEVLINK_PORT_FN_CAP_ROCE) {
+ err = devlink_port_fn_roce_set(devlink_port,
+ caps_value & DEVLINK_PORT_FN_CAP_ROCE,
+ extack);
+ if (err)
+ return err;
+ }
+ if (caps.selector & DEVLINK_PORT_FN_CAP_MIGRATABLE) {
+ err = devlink_port_fn_mig_set(devlink_port, caps_value &
+ DEVLINK_PORT_FN_CAP_MIGRATABLE,
+ extack);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int
devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port,
struct netlink_ext_ack *extack)
{
@@ -1281,6 +1412,10 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por
&msg_updated);
if (err)
goto out;
+ err = devlink_port_fn_caps_fill(ops, port, msg, extack,
+ &msg_updated);
+ if (err)
+ goto out;
err = devlink_port_fn_state_fill(ops, port, msg, extack, &msg_updated);
out:
if (err || !msg_updated)
@@ -1632,11 +1767,6 @@ static int devlink_port_function_hw_addr_set(struct devlink_port *port,
}
}
- if (!ops->port_function_hw_addr_set) {
- NL_SET_ERR_MSG_MOD(extack, "Port doesn't support function attributes");
- return -EOPNOTSUPP;
- }
-
return ops->port_function_hw_addr_set(port, hw_addr, hw_addr_len,
extack);
}
@@ -1650,12 +1780,52 @@ static int devlink_port_fn_state_set(struct devlink_port *port,
state = nla_get_u8(attr);
ops = port->devlink->ops;
- if (!ops->port_fn_state_set) {
- NL_SET_ERR_MSG_MOD(extack,
- "Function does not support state setting");
+ return ops->port_fn_state_set(port, state, extack);
+}
+
+static int devlink_port_function_validate(struct devlink_port *devlink_port,
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack)
+{
+ const struct devlink_ops *ops = devlink_port->devlink->ops;
+ struct nlattr *attr;
+
+ if (tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] &&
+ !ops->port_function_hw_addr_set) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR],
+ "Port doesn't support function attributes");
return -EOPNOTSUPP;
}
- return ops->port_fn_state_set(port, state, extack);
+ if (tb[DEVLINK_PORT_FN_ATTR_STATE] && !ops->port_fn_state_set) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR],
+ "Function does not support state setting");
+ return -EOPNOTSUPP;
+ }
+ attr = tb[DEVLINK_PORT_FN_ATTR_CAPS];
+ if (attr) {
+ struct nla_bitfield32 caps;
+
+ caps = nla_get_bitfield32(attr);
+ if (caps.selector & DEVLINK_PORT_FN_CAP_ROCE &&
+ !ops->port_fn_roce_set) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "Port doesn't support RoCE function attribute");
+ return -EOPNOTSUPP;
+ }
+ if (caps.selector & DEVLINK_PORT_FN_CAP_MIGRATABLE) {
+ if (!ops->port_fn_migratable_set) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "Port doesn't support migratable function attribute");
+ return -EOPNOTSUPP;
+ }
+ if (devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "migratable function attribute supported for VFs only");
+ return -EOPNOTSUPP;
+ }
+ }
+ }
+ return 0;
}
static int devlink_port_function_set(struct devlink_port *port,
@@ -1672,12 +1842,24 @@ static int devlink_port_function_set(struct devlink_port *port,
return err;
}
+ err = devlink_port_function_validate(port, tb, extack);
+ if (err)
+ return err;
+
attr = tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR];
if (attr) {
err = devlink_port_function_hw_addr_set(port, attr, extack);
if (err)
return err;
}
+
+ attr = tb[DEVLINK_PORT_FN_ATTR_CAPS];
+ if (attr) {
+ err = devlink_port_fn_caps_set(port, attr, extack);
+ if (err)
+ return err;
+ }
+
/* Keep this as the last function attribute set, so that when
* multiple port function attributes are set along with state,
* Those can be applied first before activating the state.