diff options
-rw-r--r-- | drivers/misc/habanalabs/common/firmware_if.c | 2 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/habanalabs.h | 1 | ||||
-rw-r--r-- | drivers/misc/habanalabs/gaudi2/gaudi2.c | 26 |
3 files changed, 27 insertions, 2 deletions
diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 537b1ae3fcb7..cda0bf3dbf1b 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -1352,7 +1352,7 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status) } } -static int hl_fw_wait_preboot_ready(struct hl_device *hdev) +int hl_fw_wait_preboot_ready(struct hl_device *hdev) { struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load; u32 status; diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index de715c91a87e..e5443bf7fe12 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -3745,6 +3745,7 @@ int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power); void hl_fw_ask_hard_reset_without_linux(struct hl_device *hdev); void hl_fw_ask_halt_machine_without_linux(struct hl_device *hdev); int hl_fw_init_cpu(struct hl_device *hdev); +int hl_fw_wait_preboot_ready(struct hl_device *hdev); int hl_fw_read_preboot_status(struct hl_device *hdev); int hl_fw_dynamic_send_protocol_cmd(struct hl_device *hdev, struct fw_load_mgr *fw_loader, diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index 85041f33e42a..987ec44fa378 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -5484,7 +5484,31 @@ static void gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_rese skip_reset: if (driver_performs_reset || hard_reset) - gaudi2_poll_btm_indication(hdev, reset_sleep_ms, poll_timeout_us); + /* + * Instead of waiting for BTM indication we should wait for preboot ready: + * Consider the below scenario: + * 1. FW update is being triggered + * - setting the dirty bit + * 2. hard reset will be triggered due to the dirty bit + * 3. FW initiates the reset: + * - dirty bit cleared + * - BTM indication cleared + * - preboot ready indication cleared + * 4. during hard reset: + * - BTM indication will be set + * - BIST test performed and another reset triggered + * 5. only after this reset the preboot will set the preboot ready + * + * when polling on BTM indication alone we can lose sync with FW while trying to + * communicate with FW that is during reset. + * to overcome this we will always wait to preboot ready indication + */ + if ((hdev->fw_components & FW_TYPE_PREBOOT_CPU)) { + msleep(reset_sleep_ms); + hl_fw_wait_preboot_ready(hdev); + } else { + gaudi2_poll_btm_indication(hdev, reset_sleep_ms, poll_timeout_us); + } else gaudi2_get_soft_rst_done_indication(hdev, poll_timeout_us); |