From 1865ea9adbfaf341c5cd5d8f7d384f19948b2fe9 Mon Sep 17 00:00:00 2001 From: Ilan Tayari Date: Wed, 30 May 2018 10:59:49 -0700 Subject: net/mlx5: Add temperature warning event to log Temperature warning event is sent by FW to indicate high temperature as detected by one of the sensors on the board. Add handling of this event by writing the numbers of the alert sensors to the kernel log. Signed-off-by: Ilan Tayari Signed-off-by: Adi Nissim Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 1814f803bd2c..1a3a2b9a7232 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -144,6 +144,8 @@ static const char *eqe_type_str(u8 type) return "MLX5_EVENT_TYPE_GPIO_EVENT"; case MLX5_EVENT_TYPE_PORT_MODULE_EVENT: return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT"; + case MLX5_EVENT_TYPE_TEMP_WARN_EVENT: + return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT"; case MLX5_EVENT_TYPE_REMOTE_CONFIG: return "MLX5_EVENT_TYPE_REMOTE_CONFIG"; case MLX5_EVENT_TYPE_DB_BF_CONGESTION: @@ -396,6 +398,20 @@ static void general_event_handler(struct mlx5_core_dev *dev, } } +static void mlx5_temp_warning_event(struct mlx5_core_dev *dev, + struct mlx5_eqe *eqe) +{ + u64 value_lsb; + u64 value_msb; + + value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb); + value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb); + + mlx5_core_warn(dev, + "High temperature on sensors with bit set %llx %llx", + value_msb, value_lsb); +} + /* caller must eventually call mlx5_cq_put on the returned cq */ static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn) { @@ -550,6 +566,10 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) mlx5_fpga_event(dev, eqe->type, &eqe->data.raw); break; + case MLX5_EVENT_TYPE_TEMP_WARN_EVENT: + mlx5_temp_warning_event(dev, eqe); + break; + case MLX5_EVENT_TYPE_GENERAL_EVENT: general_event_handler(dev, eqe); break; @@ -827,6 +847,9 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) async_event_mask |= (1ull << MLX5_EVENT_TYPE_DCT_DRAINED); + if (MLX5_CAP_GEN(dev, temp_warn_event)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_TEMP_WARN_EVENT); + err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD, MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD, "mlx5_cmd_eq", MLX5_EQ_TYPE_ASYNC); -- cgit From 1f0cf89b09305743fca5898660a7be83aab38a74 Mon Sep 17 00:00:00 2001 From: Ilan Tayari Date: Wed, 30 May 2018 10:59:50 -0700 Subject: net/mlx5: Add FPGA QP error event The FPGA queue pair (QP) event fires whenever a QP on the FPGA transitions to the error state. At this stage, this event is unrecoverable, it may become recoverable in the future. Signed-off-by: Ilan Tayari Signed-off-by: Adi Nissim Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 7 +++++-- include/linux/mlx5/device.h | 1 + include/linux/mlx5/mlx5_ifc.h | 1 + include/linux/mlx5/mlx5_ifc_fpga.h | 16 ++++++++++++++++ 4 files changed, 23 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 1a3a2b9a7232..406c23862f5f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -164,6 +164,8 @@ static const char *eqe_type_str(u8 type) return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE"; case MLX5_EVENT_TYPE_FPGA_ERROR: return "MLX5_EVENT_TYPE_FPGA_ERROR"; + case MLX5_EVENT_TYPE_FPGA_QP_ERROR: + return "MLX5_EVENT_TYPE_FPGA_QP_ERROR"; case MLX5_EVENT_TYPE_GENERAL_EVENT: return "MLX5_EVENT_TYPE_GENERAL_EVENT"; default: @@ -563,6 +565,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) break; case MLX5_EVENT_TYPE_FPGA_ERROR: + case MLX5_EVENT_TYPE_FPGA_QP_ERROR: mlx5_fpga_event(dev, eqe->type, &eqe->data.raw); break; @@ -842,11 +845,11 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) async_event_mask |= (1ull << MLX5_EVENT_TYPE_PPS_EVENT); if (MLX5_CAP_GEN(dev, fpga)) - async_event_mask |= (1ull << MLX5_EVENT_TYPE_FPGA_ERROR); + async_event_mask |= (1ull << MLX5_EVENT_TYPE_FPGA_ERROR) | + (1ull << MLX5_EVENT_TYPE_FPGA_QP_ERROR); if (MLX5_CAP_GEN_MAX(dev, dct)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_DCT_DRAINED); - if (MLX5_CAP_GEN(dev, temp_warn_event)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_TEMP_WARN_EVENT); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index c1095e08f0c8..0f006cf8343d 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -331,6 +331,7 @@ enum mlx5_event { MLX5_EVENT_TYPE_DCT_DRAINED = 0x1c, MLX5_EVENT_TYPE_FPGA_ERROR = 0x20, + MLX5_EVENT_TYPE_FPGA_QP_ERROR = 0x21, }; enum { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index ca6c0dfb5ffe..8e0b8865f91e 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -60,6 +60,7 @@ enum { MLX5_EVENT_TYPE_CODING_COMMAND_INTERFACE_COMPLETION = 0xa, MLX5_EVENT_TYPE_CODING_PAGE_REQUEST = 0xb, MLX5_EVENT_TYPE_CODING_FPGA_ERROR = 0x20, + MLX5_EVENT_TYPE_CODING_FPGA_QP_ERROR = 0x21 }; enum { diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h index 193091537cb6..64d0f40d4cc3 100644 --- a/include/linux/mlx5/mlx5_ifc_fpga.h +++ b/include/linux/mlx5/mlx5_ifc_fpga.h @@ -470,6 +470,22 @@ struct mlx5_ifc_ipsec_counters_bits { u8 dropped_cmd[0x40]; }; +enum { + MLX5_FPGA_QP_ERROR_EVENT_SYNDROME_RETRY_COUNTER_EXPIRED = 0x1, + MLX5_FPGA_QP_ERROR_EVENT_SYNDROME_RNR_EXPIRED = 0x2, +}; + +struct mlx5_ifc_fpga_qp_error_event_bits { + u8 reserved_at_0[0x40]; + + u8 reserved_at_40[0x18]; + u8 syndrome[0x8]; + + u8 reserved_at_60[0x60]; + + u8 reserved_at_c0[0x8]; + u8 fpga_qpn[0x18]; +}; enum mlx5_ifc_fpga_ipsec_response_syndrome { MLX5_FPGA_IPSEC_RESPONSE_SUCCESS = 0, MLX5_FPGA_IPSEC_RESPONSE_ILLEGAL_REQUEST = 1, -- cgit