From d59b73a66e5e0682442b6d7b4965364e57078b80 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Wed, 3 Aug 2022 10:49:23 +0300 Subject: net/mlx5: Avoid false positive lockdep warning by adding lock_class_key Add a lock_class_key per mlx5 device to avoid a false positive "possible circular locking dependency" warning by lockdep, on flows which lock more than one mlx5 device, such as adding SF. kernel log: ====================================================== WARNING: possible circular locking dependency detected 5.19.0-rc8+ #2 Not tainted ------------------------------------------------------ kworker/u20:0/8 is trying to acquire lock: ffff88812dfe0d98 (&dev->intf_state_mutex){+.+.}-{3:3}, at: mlx5_init_one+0x2e/0x490 [mlx5_core] but task is already holding lock: ffff888101aa7898 (&(¬ifier->n_head)->rwsem){++++}-{3:3}, at: blocking_notifier_call_chain+0x5a/0x130 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&(¬ifier->n_head)->rwsem){++++}-{3:3}: down_write+0x90/0x150 blocking_notifier_chain_register+0x53/0xa0 mlx5_sf_table_init+0x369/0x4a0 [mlx5_core] mlx5_init_one+0x261/0x490 [mlx5_core] probe_one+0x430/0x680 [mlx5_core] local_pci_probe+0xd6/0x170 work_for_cpu_fn+0x4e/0xa0 process_one_work+0x7c2/0x1340 worker_thread+0x6f6/0xec0 kthread+0x28f/0x330 ret_from_fork+0x1f/0x30 -> #0 (&dev->intf_state_mutex){+.+.}-{3:3}: __lock_acquire+0x2fc7/0x6720 lock_acquire+0x1c1/0x550 __mutex_lock+0x12c/0x14b0 mlx5_init_one+0x2e/0x490 [mlx5_core] mlx5_sf_dev_probe+0x29c/0x370 [mlx5_core] auxiliary_bus_probe+0x9d/0xe0 really_probe+0x1e0/0xaa0 __driver_probe_device+0x219/0x480 driver_probe_device+0x49/0x130 __device_attach_driver+0x1b8/0x280 bus_for_each_drv+0x123/0x1a0 __device_attach+0x1a3/0x460 bus_probe_device+0x1a2/0x260 device_add+0x9b1/0x1b40 __auxiliary_device_add+0x88/0xc0 mlx5_sf_dev_state_change_handler+0x67e/0x9d0 [mlx5_core] blocking_notifier_call_chain+0xd5/0x130 mlx5_vhca_state_work_handler+0x2b0/0x3f0 [mlx5_core] process_one_work+0x7c2/0x1340 worker_thread+0x59d/0xec0 kthread+0x28f/0x330 ret_from_fork+0x1f/0x30 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&(¬ifier->n_head)->rwsem); lock(&dev->intf_state_mutex); lock(&(¬ifier->n_head)->rwsem); lock(&dev->intf_state_mutex); *** DEADLOCK *** 4 locks held by kworker/u20:0/8: #0: ffff888150612938 ((wq_completion)mlx5_events){+.+.}-{0:0}, at: process_one_work+0x6e2/0x1340 #1: ffff888100cafdb8 ((work_completion)(&work->work)#3){+.+.}-{0:0}, at: process_one_work+0x70f/0x1340 #2: ffff888101aa7898 (&(¬ifier->n_head)->rwsem){++++}-{3:3}, at: blocking_notifier_call_chain+0x5a/0x130 #3: ffff88813682d0e8 (&dev->mutex){....}-{3:3}, at:__device_attach+0x76/0x460 stack backtrace: CPU: 6 PID: 8 Comm: kworker/u20:0 Not tainted 5.19.0-rc8+ Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Workqueue: mlx5_events mlx5_vhca_state_work_handler [mlx5_core] Call Trace: dump_stack_lvl+0x57/0x7d check_noncircular+0x278/0x300 ? print_circular_bug+0x460/0x460 ? lock_chain_count+0x20/0x20 ? register_lock_class+0x1880/0x1880 __lock_acquire+0x2fc7/0x6720 ? register_lock_class+0x1880/0x1880 ? register_lock_class+0x1880/0x1880 lock_acquire+0x1c1/0x550 ? mlx5_init_one+0x2e/0x490 [mlx5_core] ? lockdep_hardirqs_on_prepare+0x400/0x400 __mutex_lock+0x12c/0x14b0 ? mlx5_init_one+0x2e/0x490 [mlx5_core] ? mlx5_init_one+0x2e/0x490 [mlx5_core] ? _raw_read_unlock+0x1f/0x30 ? mutex_lock_io_nested+0x1320/0x1320 ? __ioremap_caller.constprop.0+0x306/0x490 ? mlx5_sf_dev_probe+0x269/0x370 [mlx5_core] ? iounmap+0x160/0x160 mlx5_init_one+0x2e/0x490 [mlx5_core] mlx5_sf_dev_probe+0x29c/0x370 [mlx5_core] ? mlx5_sf_dev_remove+0x130/0x130 [mlx5_core] auxiliary_bus_probe+0x9d/0xe0 really_probe+0x1e0/0xaa0 __driver_probe_device+0x219/0x480 ? auxiliary_match_id+0xe9/0x140 driver_probe_device+0x49/0x130 __device_attach_driver+0x1b8/0x280 ? driver_allows_async_probing+0x140/0x140 bus_for_each_drv+0x123/0x1a0 ? bus_for_each_dev+0x1a0/0x1a0 ? lockdep_hardirqs_on_prepare+0x286/0x400 ? trace_hardirqs_on+0x2d/0x100 __device_attach+0x1a3/0x460 ? device_driver_attach+0x1e0/0x1e0 ? kobject_uevent_env+0x22d/0xf10 bus_probe_device+0x1a2/0x260 device_add+0x9b1/0x1b40 ? dev_set_name+0xab/0xe0 ? __fw_devlink_link_to_suppliers+0x260/0x260 ? memset+0x20/0x40 ? lockdep_init_map_type+0x21a/0x7d0 __auxiliary_device_add+0x88/0xc0 ? auxiliary_device_init+0x86/0xa0 mlx5_sf_dev_state_change_handler+0x67e/0x9d0 [mlx5_core] blocking_notifier_call_chain+0xd5/0x130 mlx5_vhca_state_work_handler+0x2b0/0x3f0 [mlx5_core] ? mlx5_vhca_event_arm+0x100/0x100 [mlx5_core] ? lock_downgrade+0x6e0/0x6e0 ? lockdep_hardirqs_on_prepare+0x286/0x400 process_one_work+0x7c2/0x1340 ? lockdep_hardirqs_on_prepare+0x400/0x400 ? pwq_dec_nr_in_flight+0x230/0x230 ? rwlock_bug.part.0+0x90/0x90 worker_thread+0x59d/0xec0 ? process_one_work+0x1340/0x1340 kthread+0x28f/0x330 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x1f/0x30 Fixes: 6a3273217469 ("net/mlx5: SF, Port function state change support") Signed-off-by: Moshe Shemesh Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/mlx5/driver.h') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 96b16fbe1aa4..7b7ce602c808 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -779,6 +779,7 @@ struct mlx5_core_dev { enum mlx5_device_state state; /* sync interface state */ struct mutex intf_state_mutex; + struct lock_class_key lock_key; unsigned long intf_state; struct mlx5_priv priv; struct mlx5_profile profile; -- cgit From 9ca05b0f27de928be121cccf07735819dc9e1ed3 Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Mon, 29 Aug 2022 12:02:27 +0300 Subject: RDMA/mlx5: Rely on RoCE fw cap instead of devlink when setting profile When the RDMA auxiliary driver probes, it sets its profile based on devlink driverinit value. The latter might not be in sync with FW yet (In case devlink reload is not performed), thus causing a mismatch between RDMA driver and FW. This results in the following FW syndrome when the RDMA driver tries to adjust RoCE state, which fails the probe: "0xC1F678 | modify_nic_vport_context: roce_en set on a vport that doesn't support roce" To prevent this, select the PF profile based on FW RoCE capability instead of relying on devlink driverinit value. To provide backward compatibility of the RoCE disable feature, on older FW's where roce_rw is not set (FW RoCE capability is read-only), keep the current behavior e.g., rely on devlink driverinit value. Fixes: fbfa97b4d79f ("net/mlx5: Disable roce at HCA level") Reviewed-by: Shay Drory Reviewed-by: Michael Guralnik Reviewed-by: Saeed Mahameed Signed-off-by: Maher Sanalla Link: https://lore.kernel.org/r/cb34ce9a1df4a24c135cb804db87f7d2418bd6cc.1661763459.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/main.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/main.c | 23 +++++++++++++++++++++-- include/linux/mlx5/driver.h | 19 ++++++++++--------- 3 files changed, 32 insertions(+), 12 deletions(-) (limited to 'include/linux/mlx5/driver.h') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index fc94a1b25485..883d7c60143e 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4336,7 +4336,7 @@ static int mlx5r_probe(struct auxiliary_device *adev, dev->mdev = mdev; dev->num_ports = num_ports; - if (ll == IB_LINK_LAYER_ETHERNET && !mlx5_is_roce_init_enabled(mdev)) + if (ll == IB_LINK_LAYER_ETHERNET && !mlx5_get_roce_state(mdev)) profile = &raw_eth_profile; else profile = &pf_profile; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index bec8d6d0b5f6..4870a88cecb7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -494,6 +494,24 @@ static int max_uc_list_get_devlink_param(struct mlx5_core_dev *dev) return err; } +bool mlx5_is_roce_on(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + union devlink_param_value val; + int err; + + err = devlink_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, + &val); + + if (!err) + return val.vbool; + + mlx5_core_dbg(dev, "Failed to get param. err = %d\n", err); + return MLX5_CAP_GEN(dev, roce); +} +EXPORT_SYMBOL(mlx5_is_roce_on); + static int handle_hca_cap_2(struct mlx5_core_dev *dev, void *set_ctx) { void *set_hca_cap; @@ -597,7 +615,8 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix)); if (MLX5_CAP_GEN(dev, roce_rw_supported)) - MLX5_SET(cmd_hca_cap, set_hca_cap, roce, mlx5_is_roce_init_enabled(dev)); + MLX5_SET(cmd_hca_cap, set_hca_cap, roce, + mlx5_is_roce_on(dev)); max_uc_list = max_uc_list_get_devlink_param(dev); if (max_uc_list > 0) @@ -623,7 +642,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) */ static bool is_roce_fw_disabled(struct mlx5_core_dev *dev) { - return (MLX5_CAP_GEN(dev, roce_rw_supported) && !mlx5_is_roce_init_enabled(dev)) || + return (MLX5_CAP_GEN(dev, roce_rw_supported) && !mlx5_is_roce_on(dev)) || (!MLX5_CAP_GEN(dev, roce_rw_supported) && !MLX5_CAP_GEN(dev, roce)); } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 96b16fbe1aa4..d6338fb449c8 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1279,16 +1279,17 @@ enum { MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32, }; -static inline bool mlx5_is_roce_init_enabled(struct mlx5_core_dev *dev) +bool mlx5_is_roce_on(struct mlx5_core_dev *dev); + +static inline bool mlx5_get_roce_state(struct mlx5_core_dev *dev) { - struct devlink *devlink = priv_to_devlink(dev); - union devlink_param_value val; - int err; - - err = devlink_param_driverinit_value_get(devlink, - DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, - &val); - return err ? MLX5_CAP_GEN(dev, roce) : val.vbool; + if (MLX5_CAP_GEN(dev, roce_rw_supported)) + return MLX5_CAP_GEN(dev, roce); + + /* If RoCE cap is read-only in FW, get RoCE state from devlink + * in order to support RoCE enable/disable feature + */ + return mlx5_is_roce_on(dev); } #endif /* MLX5_DRIVER_H */ -- cgit