diff options
Diffstat (limited to 'drivers/accel')
39 files changed, 1475 insertions, 600 deletions
diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c index 3348ad12c237..601fdbe70179 100644 --- a/drivers/accel/habanalabs/common/memory.c +++ b/drivers/accel/habanalabs/common/memory.c @@ -14,7 +14,7 @@ #include <linux/vmalloc.h> #include <linux/pci-p2pdma.h> -MODULE_IMPORT_NS(DMA_BUF); +MODULE_IMPORT_NS("DMA_BUF"); #define HL_MMU_DEBUG 0 diff --git a/drivers/accel/ivpu/Kconfig b/drivers/accel/ivpu/Kconfig index 682c53245286..9e055b5ce03d 100644 --- a/drivers/accel/ivpu/Kconfig +++ b/drivers/accel/ivpu/Kconfig @@ -8,6 +8,7 @@ config DRM_ACCEL_IVPU select FW_LOADER select DRM_GEM_SHMEM_HELPER select GENERIC_ALLOCATOR + select WANT_DEV_COREDUMP help Choose this option if you have a system with an 14th generation Intel CPU (Meteor Lake) or newer. Intel NPU (formerly called Intel VPU) @@ -15,3 +16,12 @@ config DRM_ACCEL_IVPU and Deep Learning applications. If "M" is selected, the module will be called intel_vpu. + +config DRM_ACCEL_IVPU_DEBUG + bool "Intel NPU debug mode" + depends on DRM_ACCEL_IVPU + help + Choose this option to enable additional + debug features for the Intel NPU driver: + - Always print debug messages regardless of dyndbg config, + - Enable unsafe module params. diff --git a/drivers/accel/ivpu/Makefile b/drivers/accel/ivpu/Makefile index ebd682a42eb1..1029e0bab061 100644 --- a/drivers/accel/ivpu/Makefile +++ b/drivers/accel/ivpu/Makefile @@ -16,8 +16,14 @@ intel_vpu-y := \ ivpu_mmu_context.o \ ivpu_ms.o \ ivpu_pm.o \ - ivpu_sysfs.o + ivpu_sysfs.o \ + ivpu_trace_points.o intel_vpu-$(CONFIG_DEBUG_FS) += ivpu_debugfs.o +intel_vpu-$(CONFIG_DEV_COREDUMP) += ivpu_coredump.o obj-$(CONFIG_DRM_ACCEL_IVPU) += intel_vpu.o + +subdir-ccflags-$(CONFIG_DRM_ACCEL_IVPU_DEBUG) += -DDEBUG + +CFLAGS_ivpu_trace_points.o = -I$(src) diff --git a/drivers/accel/ivpu/ivpu_coredump.c b/drivers/accel/ivpu/ivpu_coredump.c new file mode 100644 index 000000000000..16ad0c30818c --- /dev/null +++ b/drivers/accel/ivpu/ivpu_coredump.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#include <linux/devcoredump.h> +#include <linux/firmware.h> + +#include "ivpu_coredump.h" +#include "ivpu_fw.h" +#include "ivpu_gem.h" +#include "vpu_boot_api.h" + +#define CRASH_DUMP_HEADER "Intel NPU crash dump" +#define CRASH_DUMP_HEADERS_SIZE SZ_4K + +void ivpu_dev_coredump(struct ivpu_device *vdev) +{ + struct drm_print_iterator pi = {}; + struct drm_printer p; + size_t coredump_size; + char *coredump; + + coredump_size = CRASH_DUMP_HEADERS_SIZE + FW_VERSION_HEADER_SIZE + + ivpu_bo_size(vdev->fw->mem_log_crit) + ivpu_bo_size(vdev->fw->mem_log_verb); + coredump = vmalloc(coredump_size); + if (!coredump) + return; + + pi.data = coredump; + pi.remain = coredump_size; + p = drm_coredump_printer(&pi); + + drm_printf(&p, "%s\n", CRASH_DUMP_HEADER); + drm_printf(&p, "FW version: %s\n", vdev->fw->version); + ivpu_fw_log_print(vdev, false, &p); + + dev_coredumpv(vdev->drm.dev, coredump, pi.offset, GFP_KERNEL); +} diff --git a/drivers/accel/ivpu/ivpu_coredump.h b/drivers/accel/ivpu/ivpu_coredump.h new file mode 100644 index 000000000000..8efb09d02441 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_coredump.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#ifndef __IVPU_COREDUMP_H__ +#define __IVPU_COREDUMP_H__ + +#include <drm/drm_print.h> + +#include "ivpu_drv.h" +#include "ivpu_fw_log.h" + +#ifdef CONFIG_DEV_COREDUMP +void ivpu_dev_coredump(struct ivpu_device *vdev); +#else +static inline void ivpu_dev_coredump(struct ivpu_device *vdev) +{ + struct drm_printer p = drm_info_printer(vdev->drm.dev); + + ivpu_fw_log_print(vdev, false, &p); +} +#endif + +#endif /* __IVPU_COREDUMP_H__ */ diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c index 8d50981594d1..8180b95ed69d 100644 --- a/drivers/accel/ivpu/ivpu_debugfs.c +++ b/drivers/accel/ivpu/ivpu_debugfs.c @@ -45,6 +45,14 @@ static int fw_name_show(struct seq_file *s, void *v) return 0; } +static int fw_version_show(struct seq_file *s, void *v) +{ + struct ivpu_device *vdev = seq_to_ivpu(s); + + seq_printf(s, "%s\n", vdev->fw->version); + return 0; +} + static int fw_trace_capability_show(struct seq_file *s, void *v) { struct ivpu_device *vdev = seq_to_ivpu(s); @@ -119,6 +127,7 @@ static int firewall_irq_counter_show(struct seq_file *s, void *v) static const struct drm_debugfs_info vdev_debugfs_list[] = { {"bo_list", bo_list_show, 0}, {"fw_name", fw_name_show, 0}, + {"fw_version", fw_version_show, 0}, {"fw_trace_capability", fw_trace_capability_show, 0}, {"fw_trace_config", fw_trace_config_show, 0}, {"last_bootmode", last_bootmode_show, 0}, @@ -127,32 +136,23 @@ static const struct drm_debugfs_info vdev_debugfs_list[] = { {"firewall_irq_counter", firewall_irq_counter_show, 0}, }; -static ssize_t -dvfs_mode_fops_write(struct file *file, const char __user *user_buf, size_t size, loff_t *pos) +static int dvfs_mode_get(void *data, u64 *dvfs_mode) { - struct ivpu_device *vdev = file->private_data; - struct ivpu_fw_info *fw = vdev->fw; - u32 dvfs_mode; - int ret; - - ret = kstrtou32_from_user(user_buf, size, 0, &dvfs_mode); - if (ret < 0) - return ret; + struct ivpu_device *vdev = (struct ivpu_device *)data; - fw->dvfs_mode = dvfs_mode; + *dvfs_mode = vdev->fw->dvfs_mode; + return 0; +} - ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev)); - if (ret) - return ret; +static int dvfs_mode_set(void *data, u64 dvfs_mode) +{ + struct ivpu_device *vdev = (struct ivpu_device *)data; - return size; + vdev->fw->dvfs_mode = (u32)dvfs_mode; + return pci_try_reset_function(to_pci_dev(vdev->drm.dev)); } -static const struct file_operations dvfs_mode_fops = { - .owner = THIS_MODULE, - .open = simple_open, - .write = dvfs_mode_fops_write, -}; +DEFINE_DEBUGFS_ATTRIBUTE(dvfs_mode_fops, dvfs_mode_get, dvfs_mode_set, "%llu\n"); static ssize_t fw_dyndbg_fops_write(struct file *file, const char __user *user_buf, size_t size, loff_t *pos) @@ -201,7 +201,7 @@ fw_log_fops_write(struct file *file, const char __user *user_buf, size_t size, l if (!size) return -EINVAL; - ivpu_fw_log_clear(vdev); + ivpu_fw_log_mark_read(vdev); return size; } @@ -346,49 +346,23 @@ static const struct file_operations ivpu_force_recovery_fops = { .write = ivpu_force_recovery_fn, }; -static ssize_t -ivpu_reset_engine_fn(struct file *file, const char __user *user_buf, size_t size, loff_t *pos) +static int ivpu_reset_engine_fn(void *data, u64 val) { - struct ivpu_device *vdev = file->private_data; - - if (!size) - return -EINVAL; + struct ivpu_device *vdev = (struct ivpu_device *)data; - if (ivpu_jsm_reset_engine(vdev, DRM_IVPU_ENGINE_COMPUTE)) - return -ENODEV; - if (ivpu_jsm_reset_engine(vdev, DRM_IVPU_ENGINE_COPY)) - return -ENODEV; - - return size; + return ivpu_jsm_reset_engine(vdev, (u32)val); } -static const struct file_operations ivpu_reset_engine_fops = { - .owner = THIS_MODULE, - .open = simple_open, - .write = ivpu_reset_engine_fn, -}; +DEFINE_DEBUGFS_ATTRIBUTE(ivpu_reset_engine_fops, NULL, ivpu_reset_engine_fn, "0x%02llx\n"); -static ssize_t -ivpu_resume_engine_fn(struct file *file, const char __user *user_buf, size_t size, loff_t *pos) +static int ivpu_resume_engine_fn(void *data, u64 val) { - struct ivpu_device *vdev = file->private_data; - - if (!size) - return -EINVAL; + struct ivpu_device *vdev = (struct ivpu_device *)data; - if (ivpu_jsm_hws_resume_engine(vdev, DRM_IVPU_ENGINE_COMPUTE)) - return -ENODEV; - if (ivpu_jsm_hws_resume_engine(vdev, DRM_IVPU_ENGINE_COPY)) - return -ENODEV; - - return size; + return ivpu_jsm_hws_resume_engine(vdev, (u32)val); } -static const struct file_operations ivpu_resume_engine_fops = { - .owner = THIS_MODULE, - .open = simple_open, - .write = ivpu_resume_engine_fn, -}; +DEFINE_DEBUGFS_ATTRIBUTE(ivpu_resume_engine_fops, NULL, ivpu_resume_engine_fn, "0x%02llx\n"); static int dct_active_get(void *data, u64 *active_percent) { @@ -432,7 +406,7 @@ void ivpu_debugfs_init(struct ivpu_device *vdev) debugfs_create_file("force_recovery", 0200, debugfs_root, vdev, &ivpu_force_recovery_fops); - debugfs_create_file("dvfs_mode", 0200, debugfs_root, vdev, + debugfs_create_file("dvfs_mode", 0644, debugfs_root, vdev, &dvfs_mode_fops); debugfs_create_file("fw_dyndbg", 0200, debugfs_root, vdev, diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index c91400ecf926..ca2bf47ce248 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -7,6 +7,7 @@ #include <linux/module.h> #include <linux/pci.h> #include <linux/pm_runtime.h> +#include <generated/utsrelease.h> #include <drm/drm_accel.h> #include <drm/drm_file.h> @@ -14,7 +15,7 @@ #include <drm/drm_ioctl.h> #include <drm/drm_prime.h> -#include "vpu_boot_api.h" +#include "ivpu_coredump.h" #include "ivpu_debugfs.h" #include "ivpu_drv.h" #include "ivpu_fw.h" @@ -29,10 +30,10 @@ #include "ivpu_ms.h" #include "ivpu_pm.h" #include "ivpu_sysfs.h" +#include "vpu_boot_api.h" #ifndef DRIVER_VERSION_STR -#define DRIVER_VERSION_STR __stringify(DRM_IVPU_DRIVER_MAJOR) "." \ - __stringify(DRM_IVPU_DRIVER_MINOR) "." +#define DRIVER_VERSION_STR "1.0.0 " UTS_RELEASE #endif static struct lock_class_key submitted_jobs_xa_lock_class_key; @@ -42,8 +43,10 @@ module_param_named(dbg_mask, ivpu_dbg_mask, int, 0644); MODULE_PARM_DESC(dbg_mask, "Driver debug mask. See IVPU_DBG_* macros."); int ivpu_test_mode; +#if IS_ENABLED(CONFIG_DRM_ACCEL_IVPU_DEBUG) module_param_named_unsafe(test_mode, ivpu_test_mode, int, 0644); MODULE_PARM_DESC(test_mode, "Test mode mask. See IVPU_TEST_MODE_* macros."); +#endif u8 ivpu_pll_min_ratio; module_param_named(pll_min_ratio, ivpu_pll_min_ratio, byte, 0644); @@ -53,9 +56,9 @@ u8 ivpu_pll_max_ratio = U8_MAX; module_param_named(pll_max_ratio, ivpu_pll_max_ratio, byte, 0644); MODULE_PARM_DESC(pll_max_ratio, "Maximum PLL ratio used to set NPU frequency"); -int ivpu_sched_mode; +int ivpu_sched_mode = IVPU_SCHED_MODE_AUTO; module_param_named(sched_mode, ivpu_sched_mode, int, 0444); -MODULE_PARM_DESC(sched_mode, "Scheduler mode: 0 - Default scheduler, 1 - Force HW scheduler"); +MODULE_PARM_DESC(sched_mode, "Scheduler mode: -1 - Use default scheduler, 0 - Use OS scheduler, 1 - Use HW scheduler"); bool ivpu_disable_mmu_cont_pages; module_param_named(disable_mmu_cont_pages, ivpu_disable_mmu_cont_pages, bool, 0444); @@ -85,7 +88,7 @@ static void file_priv_unbind(struct ivpu_device *vdev, struct ivpu_file_priv *fi ivpu_cmdq_release_all_locked(file_priv); ivpu_bo_unbind_all_bos_from_context(vdev, &file_priv->ctx); - ivpu_mmu_user_context_fini(vdev, &file_priv->ctx); + ivpu_mmu_context_fini(vdev, &file_priv->ctx); file_priv->bound = false; drm_WARN_ON(&vdev->drm, !xa_erase_irq(&vdev->context_xa, file_priv->ctx.id)); } @@ -103,6 +106,8 @@ static void file_priv_release(struct kref *ref) pm_runtime_get_sync(vdev->drm.dev); mutex_lock(&vdev->context_list_lock); file_priv_unbind(vdev, file_priv); + drm_WARN_ON(&vdev->drm, !xa_empty(&file_priv->cmdq_xa)); + xa_destroy(&file_priv->cmdq_xa); mutex_unlock(&vdev->context_list_lock); pm_runtime_put_autosuspend(vdev->drm.dev); @@ -116,8 +121,6 @@ void ivpu_file_priv_put(struct ivpu_file_priv **link) struct ivpu_file_priv *file_priv = *link; struct ivpu_device *vdev = file_priv->vdev; - drm_WARN_ON(&vdev->drm, !file_priv); - ivpu_dbg(vdev, KREF, "file_priv put: ctx %u refcount %u\n", file_priv->ctx.id, kref_read(&file_priv->ref)); @@ -255,9 +258,14 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file) goto err_unlock; } - ret = ivpu_mmu_user_context_init(vdev, &file_priv->ctx, ctx_id); - if (ret) - goto err_xa_erase; + ivpu_mmu_context_init(vdev, &file_priv->ctx, ctx_id); + + file_priv->job_limit.min = FIELD_PREP(IVPU_JOB_ID_CONTEXT_MASK, (file_priv->ctx.id - 1)); + file_priv->job_limit.max = file_priv->job_limit.min | IVPU_JOB_ID_JOB_MASK; + + xa_init_flags(&file_priv->cmdq_xa, XA_FLAGS_ALLOC1); + file_priv->cmdq_limit.min = IVPU_CMDQ_MIN_ID; + file_priv->cmdq_limit.max = IVPU_CMDQ_MAX_ID; mutex_unlock(&vdev->context_list_lock); drm_dev_exit(idx); @@ -269,8 +277,6 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file) return 0; -err_xa_erase: - xa_erase_irq(&vdev->context_xa, ctx_id); err_unlock: mutex_unlock(&vdev->context_list_lock); mutex_destroy(&file_priv->ms_lock); @@ -346,7 +352,7 @@ static int ivpu_hw_sched_init(struct ivpu_device *vdev) { int ret = 0; - if (vdev->hw->sched_mode == VPU_SCHEDULING_MODE_HW) { + if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) { ret = ivpu_jsm_hws_setup_priority_bands(vdev); if (ret) { ivpu_err(vdev, "Failed to enable hw scheduler: %d", ret); @@ -380,10 +386,7 @@ int ivpu_boot(struct ivpu_device *vdev) ret = ivpu_wait_for_ready(vdev); if (ret) { ivpu_err(vdev, "Failed to boot the firmware: %d\n", ret); - ivpu_hw_diagnose_failure(vdev); - ivpu_mmu_evtq_dump(vdev); - ivpu_fw_log_dump(vdev); - return ret; + goto err_diagnose_failure; } ivpu_hw_irq_clear(vdev); @@ -394,12 +397,20 @@ int ivpu_boot(struct ivpu_device *vdev) if (ivpu_fw_is_cold_boot(vdev)) { ret = ivpu_pm_dct_init(vdev); if (ret) - return ret; + goto err_diagnose_failure; - return ivpu_hw_sched_init(vdev); + ret = ivpu_hw_sched_init(vdev); + if (ret) + goto err_diagnose_failure; } return 0; + +err_diagnose_failure: + ivpu_hw_diagnose_failure(vdev); + ivpu_mmu_evtq_dump(vdev); + ivpu_dev_coredump(vdev); + return ret; } void ivpu_prepare_for_reset(struct ivpu_device *vdev) @@ -446,9 +457,16 @@ static const struct drm_driver driver = { .name = DRIVER_NAME, .desc = DRIVER_DESC, + +#ifdef DRIVER_DATE .date = DRIVER_DATE, - .major = DRM_IVPU_DRIVER_MAJOR, - .minor = DRM_IVPU_DRIVER_MINOR, + .major = DRIVER_MAJOR, + .minor = DRIVER_MINOR, + .patchlevel = DRIVER_PATCHLEVEL, +#else + .date = UTS_RELEASE, + .major = 1, +#endif }; static void ivpu_context_abort_invalid(struct ivpu_device *vdev) @@ -606,6 +624,9 @@ static int ivpu_dev_init(struct ivpu_device *vdev) lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key); INIT_LIST_HEAD(&vdev->bo_list); + vdev->db_limit.min = IVPU_MIN_DB; + vdev->db_limit.max = IVPU_MAX_DB; + ret = drmm_mutex_init(&vdev->drm, &vdev->context_list_lock); if (ret) goto err_xa_destroy; @@ -632,9 +653,7 @@ static int ivpu_dev_init(struct ivpu_device *vdev) if (ret) goto err_shutdown; - ret = ivpu_mmu_global_context_init(vdev); - if (ret) - goto err_shutdown; + ivpu_mmu_global_context_init(vdev); ret = ivpu_mmu_init(vdev); if (ret) @@ -722,6 +741,7 @@ static struct pci_device_id ivpu_pci_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_MTL) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_ARL) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_LNL) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PTL_P) }, { } }; MODULE_DEVICE_TABLE(pci, ivpu_pci_ids); diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index 63f13b697eed..3fdff3f6cffd 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -21,11 +21,11 @@ #define DRIVER_NAME "intel_vpu" #define DRIVER_DESC "Driver for Intel NPU (Neural Processing Unit)" -#define DRIVER_DATE "20230117" -#define PCI_DEVICE_ID_MTL 0x7d1d -#define PCI_DEVICE_ID_ARL 0xad1d -#define PCI_DEVICE_ID_LNL 0x643e +#define PCI_DEVICE_ID_MTL 0x7d1d +#define PCI_DEVICE_ID_ARL 0xad1d +#define PCI_DEVICE_ID_LNL 0x643e +#define PCI_DEVICE_ID_PTL_P 0xb03e #define IVPU_HW_IP_37XX 37 #define IVPU_HW_IP_40XX 40 @@ -46,17 +46,22 @@ #define IVPU_MIN_DB 1 #define IVPU_MAX_DB 255 -#define IVPU_NUM_ENGINES 2 +#define IVPU_JOB_ID_JOB_MASK GENMASK(7, 0) +#define IVPU_JOB_ID_CONTEXT_MASK GENMASK(31, 8) + #define IVPU_NUM_PRIORITIES 4 -#define IVPU_NUM_CMDQS_PER_CTX (IVPU_NUM_ENGINES * IVPU_NUM_PRIORITIES) +#define IVPU_NUM_CMDQS_PER_CTX (IVPU_NUM_PRIORITIES) -#define IVPU_CMDQ_INDEX(engine, priority) ((engine) * IVPU_NUM_PRIORITIES + (priority)) +#define IVPU_CMDQ_MIN_ID 1 +#define IVPU_CMDQ_MAX_ID 255 #define IVPU_PLATFORM_SILICON 0 #define IVPU_PLATFORM_SIMICS 2 #define IVPU_PLATFORM_FPGA 3 #define IVPU_PLATFORM_INVALID 8 +#define IVPU_SCHED_MODE_AUTO -1 + #define IVPU_DBG_REG BIT(0) #define IVPU_DBG_IRQ BIT(1) #define IVPU_DBG_MMU BIT(2) @@ -134,6 +139,8 @@ struct ivpu_device { struct xa_limit context_xa_limit; struct xarray db_xa; + struct xa_limit db_limit; + u32 db_next; struct mutex bo_list_lock; /* Protects bo_list */ struct list_head bo_list; @@ -152,6 +159,7 @@ struct ivpu_device { int tdr; int autosuspend; int d0i3_entry_msg; + int state_dump_msg; } timeout; }; @@ -163,11 +171,15 @@ struct ivpu_file_priv { struct kref ref; struct ivpu_device *vdev; struct mutex lock; /* Protects cmdq */ - struct ivpu_cmdq *cmdq[IVPU_NUM_CMDQS_PER_CTX]; + struct xarray cmdq_xa; struct ivpu_mmu_context ctx; struct mutex ms_lock; /* Protects ms_instance_list, ms_info_bo */ struct list_head ms_instance_list; struct ivpu_bo *ms_info_bo; + struct xa_limit job_limit; + u32 job_id_next; + struct xa_limit cmdq_limit; + u32 cmdq_id_next; bool has_mmu_faults; bool bound; bool aborted; @@ -185,9 +197,9 @@ extern bool ivpu_force_snoop; #define IVPU_TEST_MODE_NULL_SUBMISSION BIT(2) #define IVPU_TEST_MODE_D0I3_MSG_DISABLE BIT(4) #define IVPU_TEST_MODE_D0I3_MSG_ENABLE BIT(5) -#define IVPU_TEST_MODE_PREEMPTION_DISABLE BIT(6) -#define IVPU_TEST_MODE_HWS_EXTRA_EVENTS BIT(7) +#define IVPU_TEST_MODE_MIP_DISABLE BIT(6) #define IVPU_TEST_MODE_DISABLE_TIMEOUTS BIT(8) +#define IVPU_TEST_MODE_TURBO BIT(9) extern int ivpu_test_mode; struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv); @@ -215,6 +227,8 @@ static inline int ivpu_hw_ip_gen(struct ivpu_device *vdev) return IVPU_HW_IP_37XX; case PCI_DEVICE_ID_LNL: return IVPU_HW_IP_40XX; + case PCI_DEVICE_ID_PTL_P: + return IVPU_HW_IP_50XX; default: dump_stack(); ivpu_err(vdev, "Unknown NPU IP generation\n"); @@ -229,6 +243,7 @@ static inline int ivpu_hw_btrs_gen(struct ivpu_device *vdev) case PCI_DEVICE_ID_ARL: return IVPU_HW_BTRS_MTL; case PCI_DEVICE_ID_LNL: + case PCI_DEVICE_ID_PTL_P: return IVPU_HW_BTRS_LNL; default: dump_stack(); diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c index ede6165e09d9..6037ec0b3096 100644 --- a/drivers/accel/ivpu/ivpu_fw.c +++ b/drivers/accel/ivpu/ivpu_fw.c @@ -25,7 +25,6 @@ #define FW_SHAVE_NN_MAX_SIZE SZ_2M #define FW_RUNTIME_MIN_ADDR (FW_GLOBAL_MEM_START) #define FW_RUNTIME_MAX_ADDR (FW_GLOBAL_MEM_END - FW_SHARED_MEM_SIZE) -#define FW_VERSION_HEADER_SIZE SZ_4K #define FW_FILE_IMAGE_OFFSET (VPU_FW_HEADER_SIZE + FW_VERSION_HEADER_SIZE) #define WATCHDOG_MSS_REDIRECT 32 @@ -47,8 +46,10 @@ #define IVPU_FOCUS_PRESENT_TIMER_MS 1000 static char *ivpu_firmware; +#if IS_ENABLED(CONFIG_DRM_ACCEL_IVPU_DEBUG) module_param_named_unsafe(firmware, ivpu_firmware, charp, 0644); MODULE_PARM_DESC(firmware, "NPU firmware binary in /lib/firmware/.."); +#endif static struct { int gen; @@ -58,11 +59,14 @@ static struct { { IVPU_HW_IP_37XX, "intel/vpu/vpu_37xx_v0.0.bin" }, { IVPU_HW_IP_40XX, "vpu_40xx.bin" }, { IVPU_HW_IP_40XX, "intel/vpu/vpu_40xx_v0.0.bin" }, + { IVPU_HW_IP_50XX, "vpu_50xx.bin" }, + { IVPU_HW_IP_50XX, "intel/vpu/vpu_50xx_v0.0.bin" }, }; /* Production fw_names from the table above */ MODULE_FIRMWARE("intel/vpu/vpu_37xx_v0.0.bin"); MODULE_FIRMWARE("intel/vpu/vpu_40xx_v0.0.bin"); +MODULE_FIRMWARE("intel/vpu/vpu_50xx_v0.0.bin"); static int ivpu_fw_request(struct ivpu_device *vdev) { @@ -135,6 +139,15 @@ static bool is_within_range(u64 addr, size_t size, u64 range_start, size_t range return true; } +static u32 +ivpu_fw_sched_mode_select(struct ivpu_device *vdev, const struct vpu_firmware_header *fw_hdr) +{ + if (ivpu_sched_mode != IVPU_SCHED_MODE_AUTO) + return ivpu_sched_mode; + + return VPU_SCHEDULING_MODE_OS; +} + static int ivpu_fw_parse(struct ivpu_device *vdev) { struct ivpu_fw_info *fw = vdev->fw; @@ -191,8 +204,10 @@ static int ivpu_fw_parse(struct ivpu_device *vdev) ivpu_dbg(vdev, FW_BOOT, "Header version: 0x%x, format 0x%x\n", fw_hdr->header_version, fw_hdr->image_format); - ivpu_info(vdev, "Firmware: %s, version: %s", fw->name, - (const char *)fw_hdr + VPU_FW_HEADER_SIZE); + if (!scnprintf(fw->version, sizeof(fw->version), "%s", fw->file->data + VPU_FW_HEADER_SIZE)) + ivpu_warn(vdev, "Missing firmware version\n"); + + ivpu_info(vdev, "Firmware: %s, version: %s\n", fw->name, fw->version); if (IVPU_FW_CHECK_API_COMPAT(vdev, fw_hdr, BOOT, 3)) return -EINVAL; @@ -208,14 +223,16 @@ static int ivpu_fw_parse(struct ivpu_device *vdev) fw->cold_boot_entry_point = fw_hdr->entry_point; fw->entry_point = fw->cold_boot_entry_point; - fw->trace_level = min_t(u32, ivpu_log_level, IVPU_FW_LOG_FATAL); + fw->trace_level = min_t(u32, ivpu_fw_log_level, IVPU_FW_LOG_FATAL); fw->trace_destination_mask = VPU_TRACE_DESTINATION_VERBOSE_TRACING; fw->trace_hw_component_mask = -1; fw->dvfs_mode = 0; + fw->sched_mode = ivpu_fw_sched_mode_select(vdev, fw_hdr); fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_size; fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_size; + ivpu_info(vdev, "Scheduler mode: %s\n", fw->sched_mode ? "HW" : "OS"); if (fw_hdr->ro_section_start_address && !is_within_range(fw_hdr->ro_section_start_address, fw_hdr->ro_section_size, @@ -311,7 +328,7 @@ static int ivpu_fw_mem_init(struct ivpu_device *vdev) goto err_free_fw_mem; } - if (ivpu_log_level <= IVPU_FW_LOG_INFO) + if (ivpu_fw_log_level <= IVPU_FW_LOG_INFO) log_verb_size = IVPU_FW_VERBOSE_BUFFER_LARGE_SIZE; else log_verb_size = IVPU_FW_VERBOSE_BUFFER_SMALL_SIZE; @@ -567,8 +584,10 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params boot_params->ipc_payload_area_start = ipc_mem_rx->vpu_addr + ivpu_bo_size(ipc_mem_rx) / 2; boot_params->ipc_payload_area_size = ivpu_bo_size(ipc_mem_rx) / 2; - boot_params->global_aliased_pio_base = vdev->hw->ranges.user.start; - boot_params->global_aliased_pio_size = ivpu_hw_range_size(&vdev->hw->ranges.user); + if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) { + boot_params->global_aliased_pio_base = vdev->hw->ranges.user.start; + boot_params->global_aliased_pio_size = ivpu_hw_range_size(&vdev->hw->ranges.user); + } /* Allow configuration for L2C_PAGE_TABLE with boot param value */ boot_params->autoconfig = 1; @@ -604,8 +623,8 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params boot_params->punit_telemetry_sram_base = ivpu_hw_telemetry_offset_get(vdev); boot_params->punit_telemetry_sram_size = ivpu_hw_telemetry_size_get(vdev); boot_params->vpu_telemetry_enable = ivpu_hw_telemetry_enable_get(vdev); - boot_params->vpu_scheduling_mode = vdev->hw->sched_mode; - if (vdev->hw->sched_mode == VPU_SCHEDULING_MODE_HW) + boot_params->vpu_scheduling_mode = vdev->fw->sched_mode; + if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) boot_params->vpu_focus_present_timer_ms = IVPU_FOCUS_PRESENT_TIMER_MS; boot_params->dvfs_mode = vdev->fw->dvfs_mode; if (!IVPU_WA(disable_d0i3_msg)) diff --git a/drivers/accel/ivpu/ivpu_fw.h b/drivers/accel/ivpu/ivpu_fw.h index 40d9d17be3f5..1d0b2bd9d65c 100644 --- a/drivers/accel/ivpu/ivpu_fw.h +++ b/drivers/accel/ivpu/ivpu_fw.h @@ -1,11 +1,16 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #ifndef __IVPU_FW_H__ #define __IVPU_FW_H__ +#include "vpu_jsm_api.h" + +#define FW_VERSION_HEADER_SIZE SZ_4K +#define FW_VERSION_STR_SIZE SZ_256 + struct ivpu_device; struct ivpu_bo; struct vpu_boot_params; @@ -13,6 +18,7 @@ struct vpu_boot_params; struct ivpu_fw_info { const struct firmware *file; const char *name; + char version[FW_VERSION_STR_SIZE]; struct ivpu_bo *mem; struct ivpu_bo *mem_shave_nn; struct ivpu_bo *mem_log_crit; @@ -32,6 +38,7 @@ struct ivpu_fw_info { u32 secondary_preempt_buf_size; u64 read_only_addr; u32 read_only_size; + u32 sched_mode; }; int ivpu_fw_init(struct ivpu_device *vdev); diff --git a/drivers/accel/ivpu/ivpu_fw_log.c b/drivers/accel/ivpu/ivpu_fw_log.c index ef0adb5e0fbe..337c906b0210 100644 --- a/drivers/accel/ivpu/ivpu_fw_log.c +++ b/drivers/accel/ivpu/ivpu_fw_log.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #include <linux/ctype.h> @@ -15,19 +15,19 @@ #include "ivpu_fw_log.h" #include "ivpu_gem.h" -#define IVPU_FW_LOG_LINE_LENGTH 256 +#define IVPU_FW_LOG_LINE_LENGTH 256 -unsigned int ivpu_log_level = IVPU_FW_LOG_ERROR; -module_param(ivpu_log_level, uint, 0444); -MODULE_PARM_DESC(ivpu_log_level, - "NPU firmware default trace level: debug=" __stringify(IVPU_FW_LOG_DEBUG) +unsigned int ivpu_fw_log_level = IVPU_FW_LOG_ERROR; +module_param_named(fw_log_level, ivpu_fw_log_level, uint, 0444); +MODULE_PARM_DESC(fw_log_level, + "NPU firmware default log level: debug=" __stringify(IVPU_FW_LOG_DEBUG) " info=" __stringify(IVPU_FW_LOG_INFO) " warn=" __stringify(IVPU_FW_LOG_WARN) " error=" __stringify(IVPU_FW_LOG_ERROR) " fatal=" __stringify(IVPU_FW_LOG_FATAL)); -static int fw_log_ptr(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset, - struct vpu_tracing_buffer_header **log_header) +static int fw_log_from_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset, + struct vpu_tracing_buffer_header **out_log) { struct vpu_tracing_buffer_header *log; @@ -48,7 +48,7 @@ static int fw_log_ptr(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset, return -EINVAL; } - *log_header = log; + *out_log = log; *offset += log->size; ivpu_dbg(vdev, FW_BOOT, @@ -59,7 +59,7 @@ static int fw_log_ptr(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset, return 0; } -static void buffer_print(char *buffer, u32 size, struct drm_printer *p) +static void fw_log_print_lines(char *buffer, u32 size, struct drm_printer *p) { char line[IVPU_FW_LOG_LINE_LENGTH]; u32 index = 0; @@ -87,56 +87,89 @@ static void buffer_print(char *buffer, u32 size, struct drm_printer *p) } line[index] = 0; if (index != 0) - drm_printf(p, "%s\n", line); + drm_printf(p, "%s", line); } -static void fw_log_print_buffer(struct ivpu_device *vdev, struct vpu_tracing_buffer_header *log, - const char *prefix, bool only_new_msgs, struct drm_printer *p) +static void fw_log_print_buffer(struct vpu_tracing_buffer_header *log, const char *prefix, + bool only_new_msgs, struct drm_printer *p) { - char *log_buffer = (void *)log + log->header_size; - u32 log_size = log->size - log->header_size; - u32 log_start = log->read_index; - u32 log_end = log->write_index; - - if (!(log->write_index || log->wrap_count) || - (log->write_index == log->read_index && only_new_msgs)) { - drm_printf(p, "==== %s \"%s\" log empty ====\n", prefix, log->name); - return; + char *log_data = (void *)log + log->header_size; + u32 data_size = log->size - log->header_size; + u32 log_start = only_new_msgs ? READ_ONCE(log->read_index) : 0; + u32 log_end = READ_ONCE(log->write_index); + + if (log->wrap_count == log->read_wrap_count) { + if (log_end <= log_start) { + drm_printf(p, "==== %s \"%s\" log empty ====\n", prefix, log->name); + return; + } + } else if (log->wrap_count == log->read_wrap_count + 1) { + if (log_end > log_start) + log_start = log_end; + } else { + log_start = log_end; } drm_printf(p, "==== %s \"%s\" log start ====\n", prefix, log->name); - if (log->write_index > log->read_index) { - buffer_print(log_buffer + log_start, log_end - log_start, p); + if (log_end > log_start) { + fw_log_print_lines(log_data + log_start, log_end - log_start, p); } else { - buffer_print(log_buffer + log_end, log_size - log_end, p); - buffer_print(log_buffer, log_end, p); + fw_log_print_lines(log_data + log_start, data_size - log_start, p); + fw_log_print_lines(log_data, log_end, p); } - drm_printf(p, "\x1b[0m"); + drm_printf(p, "\n\x1b[0m"); /* add new line and clear formatting */ drm_printf(p, "==== %s \"%s\" log end ====\n", prefix, log->name); } -void ivpu_fw_log_print(struct ivpu_device *vdev, bool only_new_msgs, struct drm_printer *p) +static void +fw_log_print_all_in_bo(struct ivpu_device *vdev, const char *name, + struct ivpu_bo *bo, bool only_new_msgs, struct drm_printer *p) { - struct vpu_tracing_buffer_header *log_header; + struct vpu_tracing_buffer_header *log; u32 next = 0; - while (fw_log_ptr(vdev, vdev->fw->mem_log_crit, &next, &log_header) == 0) - fw_log_print_buffer(vdev, log_header, "NPU critical", only_new_msgs, p); + while (fw_log_from_bo(vdev, bo, &next, &log) == 0) + fw_log_print_buffer(log, name, only_new_msgs, p); +} + +void ivpu_fw_log_print(struct ivpu_device *vdev, bool only_new_msgs, struct drm_printer *p) +{ + fw_log_print_all_in_bo(vdev, "NPU critical", vdev->fw->mem_log_crit, only_new_msgs, p); + fw_log_print_all_in_bo(vdev, "NPU verbose", vdev->fw->mem_log_verb, only_new_msgs, p); +} + +void ivpu_fw_log_mark_read(struct ivpu_device *vdev) +{ + struct vpu_tracing_buffer_header *log; + u32 next; + + next = 0; + while (fw_log_from_bo(vdev, vdev->fw->mem_log_crit, &next, &log) == 0) { + log->read_index = READ_ONCE(log->write_index); + log->read_wrap_count = READ_ONCE(log->wrap_count); + } next = 0; - while (fw_log_ptr(vdev, vdev->fw->mem_log_verb, &next, &log_header) == 0) - fw_log_print_buffer(vdev, log_header, "NPU verbose", only_new_msgs, p); + while (fw_log_from_bo(vdev, vdev->fw->mem_log_verb, &next, &log) == 0) { + log->read_index = READ_ONCE(log->write_index); + log->read_wrap_count = READ_ONCE(log->wrap_count); + } } -void ivpu_fw_log_clear(struct ivpu_device *vdev) +void ivpu_fw_log_reset(struct ivpu_device *vdev) { - struct vpu_tracing_buffer_header *log_header; - u32 next = 0; + struct vpu_tracing_buffer_header *log; + u32 next; - while (fw_log_ptr(vdev, vdev->fw->mem_log_crit, &next, &log_header) == 0) - log_header->read_index = log_header->write_index; + next = 0; + while (fw_log_from_bo(vdev, vdev->fw->mem_log_crit, &next, &log) == 0) { + log->read_index = 0; + log->read_wrap_count = 0; + } next = 0; - while (fw_log_ptr(vdev, vdev->fw->mem_log_verb, &next, &log_header) == 0) - log_header->read_index = log_header->write_index; + while (fw_log_from_bo(vdev, vdev->fw->mem_log_verb, &next, &log) == 0) { + log->read_index = 0; + log->read_wrap_count = 0; + } } diff --git a/drivers/accel/ivpu/ivpu_fw_log.h b/drivers/accel/ivpu/ivpu_fw_log.h index 0b2573f6f315..8bb528a73cb7 100644 --- a/drivers/accel/ivpu/ivpu_fw_log.h +++ b/drivers/accel/ivpu/ivpu_fw_log.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #ifndef __IVPU_FW_LOG_H__ @@ -8,8 +8,6 @@ #include <linux/types.h> -#include <drm/drm_print.h> - #include "ivpu_drv.h" #define IVPU_FW_LOG_DEFAULT 0 @@ -19,20 +17,15 @@ #define IVPU_FW_LOG_ERROR 4 #define IVPU_FW_LOG_FATAL 5 -extern unsigned int ivpu_log_level; - #define IVPU_FW_VERBOSE_BUFFER_SMALL_SIZE SZ_1M #define IVPU_FW_VERBOSE_BUFFER_LARGE_SIZE SZ_8M #define IVPU_FW_CRITICAL_BUFFER_SIZE SZ_512K -void ivpu_fw_log_print(struct ivpu_device *vdev, bool only_new_msgs, struct drm_printer *p); -void ivpu_fw_log_clear(struct ivpu_device *vdev); +extern unsigned int ivpu_fw_log_level; -static inline void ivpu_fw_log_dump(struct ivpu_device *vdev) -{ - struct drm_printer p = drm_info_printer(vdev->drm.dev); +void ivpu_fw_log_print(struct ivpu_device *vdev, bool only_new_msgs, struct drm_printer *p); +void ivpu_fw_log_mark_read(struct ivpu_device *vdev); +void ivpu_fw_log_reset(struct ivpu_device *vdev); - ivpu_fw_log_print(vdev, false, &p); -} #endif /* __IVPU_FW_LOG_H__ */ diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c index 1b409dbd332d..16178054e629 100644 --- a/drivers/accel/ivpu/ivpu_gem.c +++ b/drivers/accel/ivpu/ivpu_gem.c @@ -384,6 +384,9 @@ int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file timeout = drm_timeout_abs_to_jiffies(args->timeout_ns); + /* Add 1 jiffy to ensure the wait function never times out before intended timeout_ns */ + timeout += 1; + obj = drm_gem_object_lookup(file, args->handle); if (!obj) return -EINVAL; @@ -406,7 +409,7 @@ static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p) mutex_lock(&bo->lock); drm_printf(p, "%-9p %-3u 0x%-12llx %-10lu 0x%-8x %-4u", - bo, bo->ctx->id, bo->vpu_addr, bo->base.base.size, + bo, bo->ctx ? bo->ctx->id : 0, bo->vpu_addr, bo->base.base.size, bo->flags, kref_read(&bo->base.base.refcount)); if (bo->base.pages) diff --git a/drivers/accel/ivpu/ivpu_hw.c b/drivers/accel/ivpu/ivpu_hw.c index e69c0613513f..4e1054f3466e 100644 --- a/drivers/accel/ivpu/ivpu_hw.c +++ b/drivers/accel/ivpu/ivpu_hw.c @@ -89,12 +89,14 @@ static void timeouts_init(struct ivpu_device *vdev) vdev->timeout.tdr = 2000000; vdev->timeout.autosuspend = -1; vdev->timeout.d0i3_entry_msg = 500; + vdev->timeout.state_dump_msg = 10; } else if (ivpu_is_simics(vdev)) { vdev->timeout.boot = 50; vdev->timeout.jsm = 500; vdev->timeout.tdr = 10000; - vdev->timeout.autosuspend = -1; + vdev->timeout.autosuspend = 100; vdev->timeout.d0i3_entry_msg = 100; + vdev->timeout.state_dump_msg = 10; } else { vdev->timeout.boot = 1000; vdev->timeout.jsm = 500; @@ -104,6 +106,7 @@ static void timeouts_init(struct ivpu_device *vdev) else vdev->timeout.autosuspend = 100; vdev->timeout.d0i3_entry_msg = 5; + vdev->timeout.state_dump_msg = 10; } } @@ -111,14 +114,14 @@ static void memory_ranges_init(struct ivpu_device *vdev) { if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) { ivpu_hw_range_init(&vdev->hw->ranges.global, 0x80000000, SZ_512M); - ivpu_hw_range_init(&vdev->hw->ranges.user, 0xc0000000, 255 * SZ_1M); + ivpu_hw_range_init(&vdev->hw->ranges.user, 0x88000000, 511 * SZ_1M); ivpu_hw_range_init(&vdev->hw->ranges.shave, 0x180000000, SZ_2G); - ivpu_hw_range_init(&vdev->hw->ranges.dma, 0x200000000, SZ_8G); + ivpu_hw_range_init(&vdev->hw->ranges.dma, 0x200000000, SZ_128G); } else { ivpu_hw_range_init(&vdev->hw->ranges.global, 0x80000000, SZ_512M); - ivpu_hw_range_init(&vdev->hw->ranges.user, 0x80000000, SZ_256M); - ivpu_hw_range_init(&vdev->hw->ranges.shave, 0x80000000 + SZ_256M, SZ_2G - SZ_256M); - ivpu_hw_range_init(&vdev->hw->ranges.dma, 0x200000000, SZ_8G); + ivpu_hw_range_init(&vdev->hw->ranges.shave, 0x80000000, SZ_2G); + ivpu_hw_range_init(&vdev->hw->ranges.user, 0x100000000, SZ_256G); + vdev->hw->ranges.dma = vdev->hw->ranges.user; } } diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h index a96a05b2acda..fc4dbfc980c8 100644 --- a/drivers/accel/ivpu/ivpu_hw.h +++ b/drivers/accel/ivpu/ivpu_hw.h @@ -46,7 +46,6 @@ struct ivpu_hw_info { u32 profiling_freq; } pll; u32 tile_fuse; - u32 sched_mode; u32 sku; u16 config; int dma_bits; diff --git a/drivers/accel/ivpu/ivpu_hw_40xx_reg.h b/drivers/accel/ivpu/ivpu_hw_40xx_reg.h index d0b795b344c7..fc0ee8d637f9 100644 --- a/drivers/accel/ivpu/ivpu_hw_40xx_reg.h +++ b/drivers/accel/ivpu/ivpu_hw_40xx_reg.h @@ -115,6 +115,8 @@ #define VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY 0x00030068u #define VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY_POST_DLY_MASK GENMASK(7, 0) +#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY_POST1_DLY_MASK GENMASK(15, 8) +#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY_POST2_DLY_MASK GENMASK(23, 16) #define VPU_50XX_HOST_SS_AON_PWR_ISLAND_STATUS_DLY 0x0003006cu #define VPU_50XX_HOST_SS_AON_PWR_ISLAND_STATUS_DLY_STATUS_DLY_MASK GENMASK(7, 0) diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.c b/drivers/accel/ivpu/ivpu_hw_btrs.c index 745e5248803d..3212c99f3682 100644 --- a/drivers/accel/ivpu/ivpu_hw_btrs.c +++ b/drivers/accel/ivpu/ivpu_hw_btrs.c @@ -141,16 +141,10 @@ static int read_tile_config_fuse(struct ivpu_device *vdev, u32 *tile_fuse_config } config = REG_GET_FLD(VPU_HW_BTRS_LNL_TILE_FUSE, CONFIG, fuse); - if (!tile_disable_check(config)) { - ivpu_err(vdev, "Fuse: Invalid tile disable config (0x%x)\n", config); - return -EIO; - } + if (!tile_disable_check(config)) + ivpu_warn(vdev, "More than 1 tile disabled, tile fuse config mask: 0x%x\n", config); - if (config) - ivpu_dbg(vdev, MISC, "Fuse: %d tiles enabled. Tile number %d disabled\n", - BTRS_LNL_TILE_MAX_NUM - 1, ffs(config) - 1); - else - ivpu_dbg(vdev, MISC, "Fuse: All %d tiles enabled\n", BTRS_LNL_TILE_MAX_NUM); + ivpu_dbg(vdev, MISC, "Tile disable config mask: 0x%x\n", config); *tile_fuse_config = config; return 0; @@ -163,7 +157,6 @@ static int info_init_mtl(struct ivpu_device *vdev) hw->tile_fuse = BTRS_MTL_TILE_FUSE_ENABLE_BOTH; hw->sku = BTRS_MTL_TILE_SKU_BOTH; hw->config = BTRS_MTL_WP_CONFIG_2_TILE_4_3_RATIO; - hw->sched_mode = ivpu_sched_mode; return 0; } @@ -178,7 +171,6 @@ static int info_init_lnl(struct ivpu_device *vdev) if (ret) return ret; - hw->sched_mode = ivpu_sched_mode; hw->tile_fuse = tile_fuse_config; hw->pll.profiling_freq = PLL_PROFILING_FREQ_DEFAULT; @@ -315,10 +307,6 @@ static void prepare_wp_request(struct ivpu_device *vdev, struct wp_request *wp, wp->cdyn = enable ? PLL_CDYN_DEFAULT : 0; wp->epp = enable ? PLL_EPP_DEFAULT : 0; } - - /* Simics cannot start without at least one tile */ - if (enable && ivpu_is_simics(vdev)) - wp->cfg = 1; } static int wait_for_pll_lock(struct ivpu_device *vdev, bool enable) @@ -465,9 +453,6 @@ int ivpu_hw_btrs_wait_for_clock_res_own_ack(struct ivpu_device *vdev) if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) return 0; - if (ivpu_is_simics(vdev)) - return 0; - return REGB_POLL_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, CLOCK_RESOURCE_OWN_ACK, 1, TIMEOUT_US); } diff --git a/drivers/accel/ivpu/ivpu_hw_ip.c b/drivers/accel/ivpu/ivpu_hw_ip.c index 60b33fc59d96..029dd065614b 100644 --- a/drivers/accel/ivpu/ivpu_hw_ip.c +++ b/drivers/accel/ivpu/ivpu_hw_ip.c @@ -8,15 +8,12 @@ #include "ivpu_hw.h" #include "ivpu_hw_37xx_reg.h" #include "ivpu_hw_40xx_reg.h" +#include "ivpu_hw_btrs.h" #include "ivpu_hw_ip.h" #include "ivpu_hw_reg_io.h" #include "ivpu_mmu.h" #include "ivpu_pm.h" -#define PWR_ISLAND_EN_POST_DLY_FREQ_DEFAULT 0 -#define PWR_ISLAND_EN_POST_DLY_FREQ_HIGH 18 -#define PWR_ISLAND_STATUS_DLY_FREQ_DEFAULT 3 -#define PWR_ISLAND_STATUS_DLY_FREQ_HIGH 46 #define PWR_ISLAND_STATUS_TIMEOUT_US (5 * USEC_PER_MSEC) #define TIM_SAFE_ENABLE 0xf1d0dead @@ -268,20 +265,15 @@ void ivpu_hw_ip_idle_gen_disable(struct ivpu_device *vdev) idle_gen_drive_40xx(vdev, false); } -static void pwr_island_delay_set_50xx(struct ivpu_device *vdev) +static void +pwr_island_delay_set_50xx(struct ivpu_device *vdev, u32 post, u32 post1, u32 post2, u32 status) { - u32 val, post, status; - - if (vdev->hw->pll.profiling_freq == PLL_PROFILING_FREQ_DEFAULT) { - post = PWR_ISLAND_EN_POST_DLY_FREQ_DEFAULT; - status = PWR_ISLAND_STATUS_DLY_FREQ_DEFAULT; - } else { - post = PWR_ISLAND_EN_POST_DLY_FREQ_HIGH; - status = PWR_ISLAND_STATUS_DLY_FREQ_HIGH; - } + u32 val; val = REGV_RD32(VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY); val = REG_SET_FLD_NUM(VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY, POST_DLY, post, val); + val = REG_SET_FLD_NUM(VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY, POST1_DLY, post1, val); + val = REG_SET_FLD_NUM(VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY, POST2_DLY, post2, val); REGV_WR32(VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY, val); val = REGV_RD32(VPU_50XX_HOST_SS_AON_PWR_ISLAND_STATUS_DLY); @@ -311,9 +303,6 @@ static void pwr_island_trickle_drive_40xx(struct ivpu_device *vdev, bool enable) val = REG_CLR_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, CSS_CPU, val); REGV_WR32(VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, val); - - if (enable) - ndelay(500); } static void pwr_island_drive_37xx(struct ivpu_device *vdev, bool enable) @@ -326,9 +315,6 @@ static void pwr_island_drive_37xx(struct ivpu_device *vdev, bool enable) val = REG_CLR_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0, CSS_CPU, val); REGV_WR32(VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0, val); - - if (!enable) - ndelay(500); } static void pwr_island_drive_40xx(struct ivpu_device *vdev, bool enable) @@ -347,9 +333,11 @@ static void pwr_island_enable(struct ivpu_device *vdev) { if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) { pwr_island_trickle_drive_37xx(vdev, true); + ndelay(500); pwr_island_drive_37xx(vdev, true); } else { pwr_island_trickle_drive_40xx(vdev, true); + ndelay(500); pwr_island_drive_40xx(vdev, true); } } @@ -686,13 +674,36 @@ static void dpu_active_drive_37xx(struct ivpu_device *vdev, bool enable) REGV_WR32(VPU_37XX_HOST_SS_AON_DPU_ACTIVE, val); } +static void pwr_island_delay_set(struct ivpu_device *vdev) +{ + bool high = vdev->hw->pll.profiling_freq == PLL_PROFILING_FREQ_HIGH; + u32 post, post1, post2, status; + + if (ivpu_hw_ip_gen(vdev) < IVPU_HW_IP_50XX) + return; + + switch (ivpu_device_id(vdev)) { + case PCI_DEVICE_ID_PTL_P: + post = high ? 18 : 0; + post1 = 0; + post2 = 0; + status = high ? 46 : 3; + break; + + default: + dump_stack(); + ivpu_err(vdev, "Unknown device ID\n"); + return; + } + + pwr_island_delay_set_50xx(vdev, post, post1, post2, status); +} + int ivpu_hw_ip_pwr_domain_enable(struct ivpu_device *vdev) { int ret; - if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_50XX) - pwr_island_delay_set_50xx(vdev); - + pwr_island_delay_set(vdev); pwr_island_enable(vdev); ret = wait_for_pwr_island_status(vdev, 0x1); diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c index 78b32a823241..01ebf88fe6ef 100644 --- a/drivers/accel/ivpu/ivpu_ipc.c +++ b/drivers/accel/ivpu/ivpu_ipc.c @@ -15,6 +15,7 @@ #include "ivpu_ipc.h" #include "ivpu_jsm_msg.h" #include "ivpu_pm.h" +#include "ivpu_trace.h" #define IPC_MAX_RX_MSG 128 @@ -227,6 +228,7 @@ int ivpu_ipc_send(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, stru goto unlock; ivpu_ipc_tx(vdev, cons->tx_vpu_addr); + trace_jsm("[tx]", req); unlock: mutex_unlock(&ipc->lock); @@ -278,12 +280,13 @@ int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, u32 size = min_t(int, rx_msg->ipc_hdr->data_size, sizeof(*jsm_msg)); if (rx_msg->jsm_msg->result != VPU_JSM_STATUS_SUCCESS) { - ivpu_dbg(vdev, IPC, "IPC resp result error: %d\n", rx_msg->jsm_msg->result); + ivpu_err(vdev, "IPC resp result error: %d\n", rx_msg->jsm_msg->result); ret = -EBADMSG; } if (jsm_msg) memcpy(jsm_msg, rx_msg->jsm_msg, size); + trace_jsm("[rx]", rx_msg->jsm_msg); } ivpu_ipc_rx_msg_del(vdev, rx_msg); @@ -291,15 +294,16 @@ int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, return ret; } -static int +int ivpu_ipc_send_receive_internal(struct ivpu_device *vdev, struct vpu_jsm_msg *req, enum vpu_ipc_msg_type expected_resp_type, - struct vpu_jsm_msg *resp, u32 channel, - unsigned long timeout_ms) + struct vpu_jsm_msg *resp, u32 channel, unsigned long timeout_ms) { struct ivpu_ipc_consumer cons; int ret; + drm_WARN_ON(&vdev->drm, pm_runtime_status_suspended(vdev->drm.dev)); + ivpu_ipc_consumer_add(vdev, &cons, channel, NULL); ret = ivpu_ipc_send(vdev, &cons, req); @@ -325,19 +329,21 @@ consumer_del: return ret; } -int ivpu_ipc_send_receive_active(struct ivpu_device *vdev, struct vpu_jsm_msg *req, - enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp, - u32 channel, unsigned long timeout_ms) +int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req, + enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp, + u32 channel, unsigned long timeout_ms) { struct vpu_jsm_msg hb_req = { .type = VPU_JSM_MSG_QUERY_ENGINE_HB }; struct vpu_jsm_msg hb_resp; int ret, hb_ret; - drm_WARN_ON(&vdev->drm, pm_runtime_status_suspended(vdev->drm.dev)); + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; ret = ivpu_ipc_send_receive_internal(vdev, req, expected_resp, resp, channel, timeout_ms); if (ret != -ETIMEDOUT) - return ret; + goto rpm_put; hb_ret = ivpu_ipc_send_receive_internal(vdev, &hb_req, VPU_JSM_MSG_QUERY_ENGINE_HB_DONE, &hb_resp, VPU_IPC_CHAN_ASYNC_CMD, @@ -345,21 +351,33 @@ int ivpu_ipc_send_receive_active(struct ivpu_device *vdev, struct vpu_jsm_msg *r if (hb_ret == -ETIMEDOUT) ivpu_pm_trigger_recovery(vdev, "IPC timeout"); +rpm_put: + ivpu_rpm_put(vdev); return ret; } -int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req, - enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp, - u32 channel, unsigned long timeout_ms) +int ivpu_ipc_send_and_wait(struct ivpu_device *vdev, struct vpu_jsm_msg *req, + u32 channel, unsigned long timeout_ms) { + struct ivpu_ipc_consumer cons; int ret; ret = ivpu_rpm_get(vdev); if (ret < 0) return ret; - ret = ivpu_ipc_send_receive_active(vdev, req, expected_resp, resp, channel, timeout_ms); + ivpu_ipc_consumer_add(vdev, &cons, channel, NULL); + ret = ivpu_ipc_send(vdev, &cons, req); + if (ret) { + ivpu_warn_ratelimited(vdev, "IPC send failed: %d\n", ret); + goto consumer_del; + } + + msleep(timeout_ms); + +consumer_del: + ivpu_ipc_consumer_del(vdev, &cons); ivpu_rpm_put(vdev); return ret; } @@ -518,7 +536,6 @@ void ivpu_ipc_fini(struct ivpu_device *vdev) { struct ivpu_ipc_info *ipc = vdev->ipc; - drm_WARN_ON(&vdev->drm, ipc->on); drm_WARN_ON(&vdev->drm, !list_empty(&ipc->cons_list)); drm_WARN_ON(&vdev->drm, !list_empty(&ipc->cb_msg_list)); drm_WARN_ON(&vdev->drm, atomic_read(&ipc->rx_msg_count) > 0); diff --git a/drivers/accel/ivpu/ivpu_ipc.h b/drivers/accel/ivpu/ivpu_ipc.h index 4fe38141045e..b4dfb504679b 100644 --- a/drivers/accel/ivpu/ivpu_ipc.h +++ b/drivers/accel/ivpu/ivpu_ipc.h @@ -101,12 +101,13 @@ int ivpu_ipc_send(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, struct ivpu_ipc_hdr *ipc_buf, struct vpu_jsm_msg *jsm_msg, unsigned long timeout_ms); - -int ivpu_ipc_send_receive_active(struct ivpu_device *vdev, struct vpu_jsm_msg *req, - enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp, - u32 channel, unsigned long timeout_ms); +int ivpu_ipc_send_receive_internal(struct ivpu_device *vdev, struct vpu_jsm_msg *req, + enum vpu_ipc_msg_type expected_resp_type, + struct vpu_jsm_msg *resp, u32 channel, unsigned long timeout_ms); int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req, enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp, u32 channel, unsigned long timeout_ms); +int ivpu_ipc_send_and_wait(struct ivpu_device *vdev, struct vpu_jsm_msg *req, + u32 channel, unsigned long timeout_ms); #endif /* __IVPU_IPC_H__ */ diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index be2e2bf0f43f..7149312f16e1 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -18,11 +18,10 @@ #include "ivpu_job.h" #include "ivpu_jsm_msg.h" #include "ivpu_pm.h" +#include "ivpu_trace.h" #include "vpu_boot_api.h" #define CMD_BUF_IDX 0 -#define JOB_ID_JOB_MASK GENMASK(7, 0) -#define JOB_ID_CONTEXT_MASK GENMASK(31, 8) #define JOB_MAX_BUFFER_COUNT 65535 static void ivpu_cmdq_ring_db(struct ivpu_device *vdev, struct ivpu_cmdq *cmdq) @@ -35,24 +34,20 @@ static int ivpu_preemption_buffers_create(struct ivpu_device *vdev, { u64 primary_size = ALIGN(vdev->fw->primary_preempt_buf_size, PAGE_SIZE); u64 secondary_size = ALIGN(vdev->fw->secondary_preempt_buf_size, PAGE_SIZE); - struct ivpu_addr_range range; - if (vdev->hw->sched_mode != VPU_SCHEDULING_MODE_HW) + if (vdev->fw->sched_mode != VPU_SCHEDULING_MODE_HW || + ivpu_test_mode & IVPU_TEST_MODE_MIP_DISABLE) return 0; - range.start = vdev->hw->ranges.user.end - (primary_size * IVPU_NUM_CMDQS_PER_CTX); - range.end = vdev->hw->ranges.user.end; - cmdq->primary_preempt_buf = ivpu_bo_create(vdev, &file_priv->ctx, &range, primary_size, - DRM_IVPU_BO_WC); + cmdq->primary_preempt_buf = ivpu_bo_create(vdev, &file_priv->ctx, &vdev->hw->ranges.user, + primary_size, DRM_IVPU_BO_WC); if (!cmdq->primary_preempt_buf) { ivpu_err(vdev, "Failed to create primary preemption buffer\n"); return -ENOMEM; } - range.start = vdev->hw->ranges.shave.end - (secondary_size * IVPU_NUM_CMDQS_PER_CTX); - range.end = vdev->hw->ranges.shave.end; - cmdq->secondary_preempt_buf = ivpu_bo_create(vdev, &file_priv->ctx, &range, secondary_size, - DRM_IVPU_BO_WC); + cmdq->secondary_preempt_buf = ivpu_bo_create(vdev, &file_priv->ctx, &vdev->hw->ranges.dma, + secondary_size, DRM_IVPU_BO_WC); if (!cmdq->secondary_preempt_buf) { ivpu_err(vdev, "Failed to create secondary preemption buffer\n"); goto err_free_primary; @@ -62,24 +57,24 @@ static int ivpu_preemption_buffers_create(struct ivpu_device *vdev, err_free_primary: ivpu_bo_free(cmdq->primary_preempt_buf); + cmdq->primary_preempt_buf = NULL; return -ENOMEM; } static void ivpu_preemption_buffers_free(struct ivpu_device *vdev, struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) { - if (vdev->hw->sched_mode != VPU_SCHEDULING_MODE_HW) + if (vdev->fw->sched_mode != VPU_SCHEDULING_MODE_HW) return; - drm_WARN_ON(&vdev->drm, !cmdq->primary_preempt_buf); - drm_WARN_ON(&vdev->drm, !cmdq->secondary_preempt_buf); - ivpu_bo_free(cmdq->primary_preempt_buf); - ivpu_bo_free(cmdq->secondary_preempt_buf); + if (cmdq->primary_preempt_buf) + ivpu_bo_free(cmdq->primary_preempt_buf); + if (cmdq->secondary_preempt_buf) + ivpu_bo_free(cmdq->secondary_preempt_buf); } static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv) { - struct xa_limit db_xa_limit = {.max = IVPU_MAX_DB, .min = IVPU_MIN_DB}; struct ivpu_device *vdev = file_priv->vdev; struct ivpu_cmdq *cmdq; int ret; @@ -88,25 +83,33 @@ static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv) if (!cmdq) return NULL; - ret = xa_alloc(&vdev->db_xa, &cmdq->db_id, NULL, db_xa_limit, GFP_KERNEL); - if (ret) { + ret = xa_alloc_cyclic(&vdev->db_xa, &cmdq->db_id, NULL, vdev->db_limit, &vdev->db_next, + GFP_KERNEL); + if (ret < 0) { ivpu_err(vdev, "Failed to allocate doorbell id: %d\n", ret); goto err_free_cmdq; } + ret = xa_alloc_cyclic(&file_priv->cmdq_xa, &cmdq->id, cmdq, file_priv->cmdq_limit, + &file_priv->cmdq_id_next, GFP_KERNEL); + if (ret < 0) { + ivpu_err(vdev, "Failed to allocate command queue id: %d\n", ret); + goto err_erase_db_xa; + } + cmdq->mem = ivpu_bo_create_global(vdev, SZ_4K, DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE); if (!cmdq->mem) - goto err_erase_xa; + goto err_erase_cmdq_xa; ret = ivpu_preemption_buffers_create(vdev, file_priv, cmdq); if (ret) - goto err_free_cmdq_mem; + ivpu_warn(vdev, "Failed to allocate preemption buffers, preemption limited\n"); return cmdq; -err_free_cmdq_mem: - ivpu_bo_free(cmdq->mem); -err_erase_xa: +err_erase_cmdq_xa: + xa_erase(&file_priv->cmdq_xa, cmdq->id); +err_erase_db_xa: xa_erase(&vdev->db_xa, cmdq->db_id); err_free_cmdq: kfree(cmdq); @@ -130,13 +133,13 @@ static int ivpu_hws_cmdq_init(struct ivpu_file_priv *file_priv, struct ivpu_cmdq struct ivpu_device *vdev = file_priv->vdev; int ret; - ret = ivpu_jsm_hws_create_cmdq(vdev, file_priv->ctx.id, file_priv->ctx.id, cmdq->db_id, + ret = ivpu_jsm_hws_create_cmdq(vdev, file_priv->ctx.id, file_priv->ctx.id, cmdq->id, task_pid_nr(current), engine, cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem)); if (ret) return ret; - ret = ivpu_jsm_hws_set_context_sched_properties(vdev, file_priv->ctx.id, cmdq->db_id, + ret = ivpu_jsm_hws_set_context_sched_properties(vdev, file_priv->ctx.id, cmdq->id, priority); if (ret) return ret; @@ -149,21 +152,22 @@ static int ivpu_register_db(struct ivpu_file_priv *file_priv, struct ivpu_cmdq * struct ivpu_device *vdev = file_priv->vdev; int ret; - if (vdev->hw->sched_mode == VPU_SCHEDULING_MODE_HW) - ret = ivpu_jsm_hws_register_db(vdev, file_priv->ctx.id, cmdq->db_id, cmdq->db_id, + if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) + ret = ivpu_jsm_hws_register_db(vdev, file_priv->ctx.id, cmdq->id, cmdq->db_id, cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem)); else ret = ivpu_jsm_register_db(vdev, file_priv->ctx.id, cmdq->db_id, cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem)); if (!ret) - ivpu_dbg(vdev, JOB, "DB %d registered to ctx %d\n", cmdq->db_id, file_priv->ctx.id); + ivpu_dbg(vdev, JOB, "DB %d registered to cmdq %d ctx %d\n", + cmdq->db_id, cmdq->id, file_priv->ctx.id); return ret; } static int -ivpu_cmdq_init(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq, u16 engine, u8 priority) +ivpu_cmdq_init(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq, u8 priority) { struct ivpu_device *vdev = file_priv->vdev; struct vpu_job_queue_header *jobq_header; @@ -179,13 +183,18 @@ ivpu_cmdq_init(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq, u16 eng cmdq->jobq = (struct vpu_job_queue *)ivpu_bo_vaddr(cmdq->mem); jobq_header = &cmdq->jobq->header; - jobq_header->engine_idx = engine; + jobq_header->engine_idx = VPU_ENGINE_COMPUTE; jobq_header->head = 0; jobq_header->tail = 0; + if (ivpu_test_mode & IVPU_TEST_MODE_TURBO) { + ivpu_dbg(vdev, JOB, "Turbo mode enabled"); + jobq_header->flags = VPU_JOB_QUEUE_FLAGS_TURBO_MODE; + } + wmb(); /* Flush WC buffer for jobq->header */ - if (vdev->hw->sched_mode == VPU_SCHEDULING_MODE_HW) { - ret = ivpu_hws_cmdq_init(file_priv, cmdq, engine, priority); + if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) { + ret = ivpu_hws_cmdq_init(file_priv, cmdq, VPU_ENGINE_COMPUTE, priority); if (ret) return ret; } @@ -211,10 +220,10 @@ static int ivpu_cmdq_fini(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cm cmdq->db_registered = false; - if (vdev->hw->sched_mode == VPU_SCHEDULING_MODE_HW) { - ret = ivpu_jsm_hws_destroy_cmdq(vdev, file_priv->ctx.id, cmdq->db_id); + if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) { + ret = ivpu_jsm_hws_destroy_cmdq(vdev, file_priv->ctx.id, cmdq->id); if (!ret) - ivpu_dbg(vdev, JOB, "Command queue %d destroyed\n", cmdq->db_id); + ivpu_dbg(vdev, JOB, "Command queue %d destroyed\n", cmdq->id); } ret = ivpu_jsm_unregister_db(vdev, cmdq->db_id); @@ -224,55 +233,46 @@ static int ivpu_cmdq_fini(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cm return 0; } -static struct ivpu_cmdq *ivpu_cmdq_acquire(struct ivpu_file_priv *file_priv, u16 engine, - u8 priority) +static struct ivpu_cmdq *ivpu_cmdq_acquire(struct ivpu_file_priv *file_priv, u8 priority) { - int cmdq_idx = IVPU_CMDQ_INDEX(engine, priority); - struct ivpu_cmdq *cmdq = file_priv->cmdq[cmdq_idx]; + struct ivpu_cmdq *cmdq; + unsigned long cmdq_id; int ret; lockdep_assert_held(&file_priv->lock); + xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) + if (cmdq->priority == priority) + break; + if (!cmdq) { cmdq = ivpu_cmdq_alloc(file_priv); if (!cmdq) return NULL; - file_priv->cmdq[cmdq_idx] = cmdq; + cmdq->priority = priority; } - ret = ivpu_cmdq_init(file_priv, cmdq, engine, priority); + ret = ivpu_cmdq_init(file_priv, cmdq, priority); if (ret) return NULL; return cmdq; } -static void ivpu_cmdq_release_locked(struct ivpu_file_priv *file_priv, u16 engine, u8 priority) +void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv) { - int cmdq_idx = IVPU_CMDQ_INDEX(engine, priority); - struct ivpu_cmdq *cmdq = file_priv->cmdq[cmdq_idx]; + struct ivpu_cmdq *cmdq; + unsigned long cmdq_id; lockdep_assert_held(&file_priv->lock); - if (cmdq) { - file_priv->cmdq[cmdq_idx] = NULL; + xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) { + xa_erase(&file_priv->cmdq_xa, cmdq_id); ivpu_cmdq_fini(file_priv, cmdq); ivpu_cmdq_free(file_priv, cmdq); } } -void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv) -{ - u16 engine; - u8 priority; - - lockdep_assert_held(&file_priv->lock); - - for (engine = 0; engine < IVPU_NUM_ENGINES; engine++) - for (priority = 0; priority < IVPU_NUM_PRIORITIES; priority++) - ivpu_cmdq_release_locked(file_priv, engine, priority); -} - /* * Mark the doorbell as unregistered * This function needs to be called when the VPU hardware is restarted @@ -281,20 +281,13 @@ void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv) */ static void ivpu_cmdq_reset(struct ivpu_file_priv *file_priv) { - u16 engine; - u8 priority; + struct ivpu_cmdq *cmdq; + unsigned long cmdq_id; mutex_lock(&file_priv->lock); - for (engine = 0; engine < IVPU_NUM_ENGINES; engine++) { - for (priority = 0; priority < IVPU_NUM_PRIORITIES; priority++) { - int cmdq_idx = IVPU_CMDQ_INDEX(engine, priority); - struct ivpu_cmdq *cmdq = file_priv->cmdq[cmdq_idx]; - - if (cmdq) - cmdq->db_registered = false; - } - } + xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) + cmdq->db_registered = false; mutex_unlock(&file_priv->lock); } @@ -314,17 +307,11 @@ void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev) static void ivpu_cmdq_fini_all(struct ivpu_file_priv *file_priv) { - u16 engine; - u8 priority; - - for (engine = 0; engine < IVPU_NUM_ENGINES; engine++) { - for (priority = 0; priority < IVPU_NUM_PRIORITIES; priority++) { - int cmdq_idx = IVPU_CMDQ_INDEX(engine, priority); + struct ivpu_cmdq *cmdq; + unsigned long cmdq_id; - if (file_priv->cmdq[cmdq_idx]) - ivpu_cmdq_fini(file_priv, file_priv->cmdq[cmdq_idx]); - } - } + xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) + ivpu_cmdq_fini(file_priv, cmdq); } void ivpu_context_abort_locked(struct ivpu_file_priv *file_priv) @@ -335,7 +322,7 @@ void ivpu_context_abort_locked(struct ivpu_file_priv *file_priv) ivpu_cmdq_fini_all(file_priv); - if (vdev->hw->sched_mode == VPU_SCHEDULING_MODE_OS) + if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_OS) ivpu_jsm_context_release(vdev, file_priv->ctx.id); } @@ -349,24 +336,29 @@ static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job) /* Check if there is space left in job queue */ if (next_entry == header->head) { - ivpu_dbg(vdev, JOB, "Job queue full: ctx %d engine %d db %d head %d tail %d\n", - job->file_priv->ctx.id, job->engine_idx, cmdq->db_id, header->head, tail); + ivpu_dbg(vdev, JOB, "Job queue full: ctx %d cmdq %d db %d head %d tail %d\n", + job->file_priv->ctx.id, cmdq->id, cmdq->db_id, header->head, tail); return -EBUSY; } - entry = &cmdq->jobq->job[tail]; + entry = &cmdq->jobq->slot[tail].job; entry->batch_buf_addr = job->cmd_buf_vpu_addr; entry->job_id = job->job_id; entry->flags = 0; if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_SUBMISSION)) entry->flags = VPU_JOB_FLAGS_NULL_SUBMISSION_MASK; - if (vdev->hw->sched_mode == VPU_SCHEDULING_MODE_HW && - (unlikely(!(ivpu_test_mode & IVPU_TEST_MODE_PREEMPTION_DISABLE)))) { - entry->primary_preempt_buf_addr = cmdq->primary_preempt_buf->vpu_addr; - entry->primary_preempt_buf_size = ivpu_bo_size(cmdq->primary_preempt_buf); - entry->secondary_preempt_buf_addr = cmdq->secondary_preempt_buf->vpu_addr; - entry->secondary_preempt_buf_size = ivpu_bo_size(cmdq->secondary_preempt_buf); + if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) { + if (cmdq->primary_preempt_buf) { + entry->primary_preempt_buf_addr = cmdq->primary_preempt_buf->vpu_addr; + entry->primary_preempt_buf_size = ivpu_bo_size(cmdq->primary_preempt_buf); + } + + if (cmdq->secondary_preempt_buf) { + entry->secondary_preempt_buf_addr = cmdq->secondary_preempt_buf->vpu_addr; + entry->secondary_preempt_buf_size = + ivpu_bo_size(cmdq->secondary_preempt_buf); + } } wmb(); /* Ensure that tail is updated after filling entry */ @@ -457,6 +449,7 @@ ivpu_job_create(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count) job->file_priv = ivpu_file_priv_get(file_priv); + trace_job("create", job); ivpu_dbg(vdev, JOB, "Job created: ctx %2d engine %d", file_priv->ctx.id, job->engine_idx); return job; @@ -496,6 +489,7 @@ static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 job->bos[CMD_BUF_IDX]->job_status = job_status; dma_fence_signal(job->done_fence); + trace_job("done", job); ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d engine %d status 0x%x\n", job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status); @@ -519,7 +513,6 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority) { struct ivpu_file_priv *file_priv = job->file_priv; struct ivpu_device *vdev = job->vdev; - struct xa_limit job_id_range; struct ivpu_cmdq *cmdq; bool is_first_job; int ret; @@ -530,7 +523,7 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority) mutex_lock(&file_priv->lock); - cmdq = ivpu_cmdq_acquire(job->file_priv, job->engine_idx, priority); + cmdq = ivpu_cmdq_acquire(file_priv, priority); if (!cmdq) { ivpu_warn_ratelimited(vdev, "Failed to get job queue, ctx %d engine %d prio %d\n", file_priv->ctx.id, job->engine_idx, priority); @@ -538,13 +531,11 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority) goto err_unlock_file_priv; } - job_id_range.min = FIELD_PREP(JOB_ID_CONTEXT_MASK, (file_priv->ctx.id - 1)); - job_id_range.max = job_id_range.min | JOB_ID_JOB_MASK; - xa_lock(&vdev->submitted_jobs_xa); is_first_job = xa_empty(&vdev->submitted_jobs_xa); - ret = __xa_alloc(&vdev->submitted_jobs_xa, &job->job_id, job, job_id_range, GFP_KERNEL); - if (ret) { + ret = __xa_alloc_cyclic(&vdev->submitted_jobs_xa, &job->job_id, job, file_priv->job_limit, + &file_priv->job_id_next, GFP_KERNEL); + if (ret < 0) { ivpu_dbg(vdev, JOB, "Too many active jobs in ctx %d\n", file_priv->ctx.id); ret = -EBUSY; @@ -566,6 +557,7 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority) vdev->busy_start_ts = ktime_get(); } + trace_job("submit", job); ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d engine %d prio %d addr 0x%llx next %d\n", job->job_id, file_priv->ctx.id, job->engine_idx, priority, job->cmd_buf_vpu_addr, cmdq->jobq->header.tail); @@ -673,7 +665,7 @@ int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file) int idx, ret; u8 priority; - if (params->engine > DRM_IVPU_ENGINE_COPY) + if (params->engine != DRM_IVPU_ENGINE_COMPUTE) return -EINVAL; if (params->priority > DRM_IVPU_JOB_PRIORITY_REALTIME) diff --git a/drivers/accel/ivpu/ivpu_job.h b/drivers/accel/ivpu/ivpu_job.h index 6accb94028c7..8b19e3f8b4cf 100644 --- a/drivers/accel/ivpu/ivpu_job.h +++ b/drivers/accel/ivpu/ivpu_job.h @@ -28,8 +28,10 @@ struct ivpu_cmdq { struct ivpu_bo *secondary_preempt_buf; struct ivpu_bo *mem; u32 entry_count; + u32 id; u32 db_id; bool db_registered; + u8 priority; }; /** diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.c b/drivers/accel/ivpu/ivpu_jsm_msg.c index 46ef16c3c069..30a40be76930 100644 --- a/drivers/accel/ivpu/ivpu_jsm_msg.c +++ b/drivers/accel/ivpu/ivpu_jsm_msg.c @@ -48,9 +48,10 @@ const char *ivpu_jsm_msg_type_to_str(enum vpu_ipc_msg_type type) IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE); IVPU_CASE_TO_STR(VPU_JSM_MSG_STATE_DUMP); IVPU_CASE_TO_STR(VPU_JSM_MSG_STATE_DUMP_RSP); - IVPU_CASE_TO_STR(VPU_JSM_MSG_BLOB_DEINIT); + IVPU_CASE_TO_STR(VPU_JSM_MSG_BLOB_DEINIT_DEPRECATED); IVPU_CASE_TO_STR(VPU_JSM_MSG_DYNDBG_CONTROL); IVPU_CASE_TO_STR(VPU_JSM_MSG_JOB_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_NATIVE_FENCE_SIGNALLED); IVPU_CASE_TO_STR(VPU_JSM_MSG_ENGINE_RESET_DONE); IVPU_CASE_TO_STR(VPU_JSM_MSG_ENGINE_PREEMPT_DONE); IVPU_CASE_TO_STR(VPU_JSM_MSG_REGISTER_DB_DONE); @@ -131,7 +132,7 @@ int ivpu_jsm_get_heartbeat(struct ivpu_device *vdev, u32 engine, u64 *heartbeat) struct vpu_jsm_msg resp; int ret; - if (engine > VPU_ENGINE_COPY) + if (engine != VPU_ENGINE_COMPUTE) return -EINVAL; req.payload.query_engine_hb.engine_idx = engine; @@ -154,7 +155,7 @@ int ivpu_jsm_reset_engine(struct ivpu_device *vdev, u32 engine) struct vpu_jsm_msg resp; int ret; - if (engine > VPU_ENGINE_COPY) + if (engine != VPU_ENGINE_COMPUTE) return -EINVAL; req.payload.engine_reset.engine_idx = engine; @@ -173,7 +174,7 @@ int ivpu_jsm_preempt_engine(struct ivpu_device *vdev, u32 engine, u32 preempt_id struct vpu_jsm_msg resp; int ret; - if (engine > VPU_ENGINE_COPY) + if (engine != VPU_ENGINE_COMPUTE) return -EINVAL; req.payload.engine_preempt.engine_idx = engine; @@ -196,7 +197,7 @@ int ivpu_jsm_dyndbg_control(struct ivpu_device *vdev, char *command, size_t size strscpy(req.payload.dyndbg_control.dyndbg_cmd, command, VPU_DYNDBG_CMD_MAX_LEN); ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_DYNDBG_CONTROL_RSP, &resp, - VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + VPU_IPC_CHAN_GEN_CMD, vdev->timeout.jsm); if (ret) ivpu_warn_ratelimited(vdev, "Failed to send command \"%s\": ret %d\n", command, ret); @@ -270,9 +271,8 @@ int ivpu_jsm_pwr_d0i3_enter(struct ivpu_device *vdev) req.payload.pwr_d0i3_enter.send_response = 1; - ret = ivpu_ipc_send_receive_active(vdev, &req, VPU_JSM_MSG_PWR_D0I3_ENTER_DONE, - &resp, VPU_IPC_CHAN_GEN_CMD, - vdev->timeout.d0i3_entry_msg); + ret = ivpu_ipc_send_receive_internal(vdev, &req, VPU_JSM_MSG_PWR_D0I3_ENTER_DONE, &resp, + VPU_IPC_CHAN_GEN_CMD, vdev->timeout.d0i3_entry_msg); if (ret) return ret; @@ -346,7 +346,7 @@ int ivpu_jsm_hws_resume_engine(struct ivpu_device *vdev, u32 engine) struct vpu_jsm_msg resp; int ret; - if (engine >= VPU_ENGINE_NB) + if (engine != VPU_ENGINE_COMPUTE) return -EINVAL; req.payload.hws_resume_engine.engine_idx = engine; @@ -394,8 +394,6 @@ int ivpu_jsm_hws_set_scheduling_log(struct ivpu_device *vdev, u32 engine_idx, u3 req.payload.hws_set_scheduling_log.host_ssid = host_ssid; req.payload.hws_set_scheduling_log.vpu_log_buffer_va = vpu_log_buffer_va; req.payload.hws_set_scheduling_log.notify_index = 0; - req.payload.hws_set_scheduling_log.enable_extra_events = - ivpu_test_mode & IVPU_TEST_MODE_HWS_EXTRA_EVENTS; ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG_RSP, &resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); @@ -430,8 +428,8 @@ int ivpu_jsm_hws_setup_priority_bands(struct ivpu_device *vdev) req.payload.hws_priority_band_setup.normal_band_percentage = 10; - ret = ivpu_ipc_send_receive_active(vdev, &req, VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP_RSP, - &resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + ret = ivpu_ipc_send_receive_internal(vdev, &req, VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP_RSP, + &resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); if (ret) ivpu_warn_ratelimited(vdev, "Failed to set priority bands: %d\n", ret); @@ -544,9 +542,8 @@ int ivpu_jsm_dct_enable(struct ivpu_device *vdev, u32 active_us, u32 inactive_us req.payload.pwr_dct_control.dct_active_us = active_us; req.payload.pwr_dct_control.dct_inactive_us = inactive_us; - return ivpu_ipc_send_receive_active(vdev, &req, VPU_JSM_MSG_DCT_ENABLE_DONE, - &resp, VPU_IPC_CHAN_ASYNC_CMD, - vdev->timeout.jsm); + return ivpu_ipc_send_receive_internal(vdev, &req, VPU_JSM_MSG_DCT_ENABLE_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); } int ivpu_jsm_dct_disable(struct ivpu_device *vdev) @@ -554,7 +551,14 @@ int ivpu_jsm_dct_disable(struct ivpu_device *vdev) struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_DCT_DISABLE }; struct vpu_jsm_msg resp; - return ivpu_ipc_send_receive_active(vdev, &req, VPU_JSM_MSG_DCT_DISABLE_DONE, - &resp, VPU_IPC_CHAN_ASYNC_CMD, - vdev->timeout.jsm); + return ivpu_ipc_send_receive_internal(vdev, &req, VPU_JSM_MSG_DCT_DISABLE_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); +} + +int ivpu_jsm_state_dump(struct ivpu_device *vdev) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_STATE_DUMP }; + + return ivpu_ipc_send_and_wait(vdev, &req, VPU_IPC_CHAN_ASYNC_CMD, + vdev->timeout.state_dump_msg); } diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.h b/drivers/accel/ivpu/ivpu_jsm_msg.h index e4e42c0ff6e6..9e84d3526a14 100644 --- a/drivers/accel/ivpu/ivpu_jsm_msg.h +++ b/drivers/accel/ivpu/ivpu_jsm_msg.h @@ -43,4 +43,6 @@ int ivpu_jsm_metric_streamer_info(struct ivpu_device *vdev, u64 metric_group_mas u64 buffer_size, u32 *sample_size, u64 *info_size); int ivpu_jsm_dct_enable(struct ivpu_device *vdev, u32 active_us, u32 inactive_us); int ivpu_jsm_dct_disable(struct ivpu_device *vdev); +int ivpu_jsm_state_dump(struct ivpu_device *vdev); + #endif diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c index c078e214b221..26ef52fbb93e 100644 --- a/drivers/accel/ivpu/ivpu_mmu.c +++ b/drivers/accel/ivpu/ivpu_mmu.c @@ -696,7 +696,7 @@ unlock: return ret; } -static int ivpu_mmu_cd_add(struct ivpu_device *vdev, u32 ssid, u64 cd_dma) +static int ivpu_mmu_cdtab_entry_set(struct ivpu_device *vdev, u32 ssid, u64 cd_dma, bool valid) { struct ivpu_mmu_info *mmu = vdev->mmu; struct ivpu_mmu_cdtab *cdtab = &mmu->cdtab; @@ -708,30 +708,29 @@ static int ivpu_mmu_cd_add(struct ivpu_device *vdev, u32 ssid, u64 cd_dma) return -EINVAL; entry = cdtab->base + (ssid * IVPU_MMU_CDTAB_ENT_SIZE); - - if (cd_dma != 0) { - cd[0] = FIELD_PREP(IVPU_MMU_CD_0_TCR_T0SZ, IVPU_MMU_T0SZ_48BIT) | - FIELD_PREP(IVPU_MMU_CD_0_TCR_TG0, 0) | - FIELD_PREP(IVPU_MMU_CD_0_TCR_IRGN0, 0) | - FIELD_PREP(IVPU_MMU_CD_0_TCR_ORGN0, 0) | - FIELD_PREP(IVPU_MMU_CD_0_TCR_SH0, 0) | - FIELD_PREP(IVPU_MMU_CD_0_TCR_IPS, IVPU_MMU_IPS_48BIT) | - FIELD_PREP(IVPU_MMU_CD_0_ASID, ssid) | - IVPU_MMU_CD_0_TCR_EPD1 | - IVPU_MMU_CD_0_AA64 | - IVPU_MMU_CD_0_R | - IVPU_MMU_CD_0_ASET | - IVPU_MMU_CD_0_V; - cd[1] = cd_dma & IVPU_MMU_CD_1_TTB0_MASK; - cd[2] = 0; - cd[3] = 0x0000000000007444; - - /* For global context generate memory fault on VPU */ - if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID) - cd[0] |= IVPU_MMU_CD_0_A; - } else { - memset(cd, 0, sizeof(cd)); - } + drm_WARN_ON(&vdev->drm, (entry[0] & IVPU_MMU_CD_0_V) == valid); + + cd[0] = FIELD_PREP(IVPU_MMU_CD_0_TCR_T0SZ, IVPU_MMU_T0SZ_48BIT) | + FIELD_PREP(IVPU_MMU_CD_0_TCR_TG0, 0) | + FIELD_PREP(IVPU_MMU_CD_0_TCR_IRGN0, 0) | + FIELD_PREP(IVPU_MMU_CD_0_TCR_ORGN0, 0) | + FIELD_PREP(IVPU_MMU_CD_0_TCR_SH0, 0) | + FIELD_PREP(IVPU_MMU_CD_0_TCR_IPS, IVPU_MMU_IPS_48BIT) | + FIELD_PREP(IVPU_MMU_CD_0_ASID, ssid) | + IVPU_MMU_CD_0_TCR_EPD1 | + IVPU_MMU_CD_0_AA64 | + IVPU_MMU_CD_0_R | + IVPU_MMU_CD_0_ASET; + cd[1] = cd_dma & IVPU_MMU_CD_1_TTB0_MASK; + cd[2] = 0; + cd[3] = 0x0000000000007444; + + /* For global context generate memory fault on VPU */ + if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID) + cd[0] |= IVPU_MMU_CD_0_A; + + if (valid) + cd[0] |= IVPU_MMU_CD_0_V; WRITE_ONCE(entry[1], cd[1]); WRITE_ONCE(entry[2], cd[2]); @@ -741,8 +740,8 @@ static int ivpu_mmu_cd_add(struct ivpu_device *vdev, u32 ssid, u64 cd_dma) if (!ivpu_is_force_snoop_enabled(vdev)) clflush_cache_range(entry, IVPU_MMU_CDTAB_ENT_SIZE); - ivpu_dbg(vdev, MMU, "CDTAB %s entry (SSID=%u, dma=%pad): 0x%llx, 0x%llx, 0x%llx, 0x%llx\n", - cd_dma ? "write" : "clear", ssid, &cd_dma, cd[0], cd[1], cd[2], cd[3]); + ivpu_dbg(vdev, MMU, "CDTAB set %s entry (SSID=%u, dma=%pad): 0x%llx, 0x%llx, 0x%llx, 0x%llx\n", + valid ? "valid" : "invalid", ssid, &cd_dma, cd[0], cd[1], cd[2], cd[3]); mutex_lock(&mmu->lock); if (!mmu->on) @@ -750,38 +749,18 @@ static int ivpu_mmu_cd_add(struct ivpu_device *vdev, u32 ssid, u64 cd_dma) ret = ivpu_mmu_cmdq_write_cfgi_all(vdev); if (ret) - goto unlock; + goto err_invalidate; ret = ivpu_mmu_cmdq_sync(vdev); + if (ret) + goto err_invalidate; unlock: mutex_unlock(&mmu->lock); - return ret; -} - -static int ivpu_mmu_cd_add_gbl(struct ivpu_device *vdev) -{ - int ret; - - ret = ivpu_mmu_cd_add(vdev, 0, vdev->gctx.pgtable.pgd_dma); - if (ret) - ivpu_err(vdev, "Failed to add global CD entry: %d\n", ret); - - return ret; -} - -static int ivpu_mmu_cd_add_user(struct ivpu_device *vdev, u32 ssid, dma_addr_t cd_dma) -{ - int ret; - - if (ssid == 0) { - ivpu_err(vdev, "Invalid SSID: %u\n", ssid); - return -EINVAL; - } - - ret = ivpu_mmu_cd_add(vdev, ssid, cd_dma); - if (ret) - ivpu_err(vdev, "Failed to add CD entry SSID=%u: %d\n", ssid, ret); + return 0; +err_invalidate: + WRITE_ONCE(entry[0], 0); + mutex_unlock(&mmu->lock); return ret; } @@ -808,12 +787,6 @@ int ivpu_mmu_init(struct ivpu_device *vdev) return ret; } - ret = ivpu_mmu_cd_add_gbl(vdev); - if (ret) { - ivpu_err(vdev, "Failed to initialize strtab: %d\n", ret); - return ret; - } - ret = ivpu_mmu_enable(vdev); if (ret) { ivpu_err(vdev, "Failed to resume MMU: %d\n", ret); @@ -966,12 +939,12 @@ void ivpu_mmu_irq_gerr_handler(struct ivpu_device *vdev) REGV_WR32(IVPU_MMU_REG_GERRORN, gerror_val); } -int ivpu_mmu_set_pgtable(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable) +int ivpu_mmu_cd_set(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable) { - return ivpu_mmu_cd_add_user(vdev, ssid, pgtable->pgd_dma); + return ivpu_mmu_cdtab_entry_set(vdev, ssid, pgtable->pgd_dma, true); } -void ivpu_mmu_clear_pgtable(struct ivpu_device *vdev, int ssid) +void ivpu_mmu_cd_clear(struct ivpu_device *vdev, int ssid) { - ivpu_mmu_cd_add_user(vdev, ssid, 0); /* 0 will clear CD entry */ + ivpu_mmu_cdtab_entry_set(vdev, ssid, 0, false); } diff --git a/drivers/accel/ivpu/ivpu_mmu.h b/drivers/accel/ivpu/ivpu_mmu.h index 6fa35c240710..7afea9cd8731 100644 --- a/drivers/accel/ivpu/ivpu_mmu.h +++ b/drivers/accel/ivpu/ivpu_mmu.h @@ -40,8 +40,8 @@ struct ivpu_mmu_info { int ivpu_mmu_init(struct ivpu_device *vdev); void ivpu_mmu_disable(struct ivpu_device *vdev); int ivpu_mmu_enable(struct ivpu_device *vdev); -int ivpu_mmu_set_pgtable(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable); -void ivpu_mmu_clear_pgtable(struct ivpu_device *vdev, int ssid); +int ivpu_mmu_cd_set(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable); +void ivpu_mmu_cd_clear(struct ivpu_device *vdev, int ssid); int ivpu_mmu_invalidate_tlb(struct ivpu_device *vdev, u16 ssid); void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev); diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c index bbe652a7019d..0af614dfb6f9 100644 --- a/drivers/accel/ivpu/ivpu_mmu_context.c +++ b/drivers/accel/ivpu/ivpu_mmu_context.c @@ -90,19 +90,6 @@ static void ivpu_pgtable_free_page(struct ivpu_device *vdev, u64 *cpu_addr, dma_ } } -static int ivpu_mmu_pgtable_init(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable) -{ - dma_addr_t pgd_dma; - - pgtable->pgd_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pgd_dma); - if (!pgtable->pgd_dma_ptr) - return -ENOMEM; - - pgtable->pgd_dma = pgd_dma; - - return 0; -} - static void ivpu_mmu_pgtables_free(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable) { int pgd_idx, pud_idx, pmd_idx; @@ -140,6 +127,27 @@ static void ivpu_mmu_pgtables_free(struct ivpu_device *vdev, struct ivpu_mmu_pgt } ivpu_pgtable_free_page(vdev, pgtable->pgd_dma_ptr, pgtable->pgd_dma); + pgtable->pgd_dma_ptr = NULL; + pgtable->pgd_dma = 0; +} + +static u64* +ivpu_mmu_ensure_pgd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable) +{ + u64 *pgd_dma_ptr = pgtable->pgd_dma_ptr; + dma_addr_t pgd_dma; + + if (pgd_dma_ptr) + return pgd_dma_ptr; + + pgd_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pgd_dma); + if (!pgd_dma_ptr) + return NULL; + + pgtable->pgd_dma_ptr = pgd_dma_ptr; + pgtable->pgd_dma = pgd_dma; + + return pgd_dma_ptr; } static u64* @@ -237,6 +245,12 @@ ivpu_mmu_context_map_page(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr); int pte_idx = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr); + drm_WARN_ON(&vdev->drm, ctx->id == IVPU_RESERVED_CONTEXT_MMU_SSID); + + /* Allocate PGD - first level page table if needed */ + if (!ivpu_mmu_ensure_pgd(vdev, &ctx->pgtable)) + return -ENOMEM; + /* Allocate PUD - second level page table if needed */ if (!ivpu_mmu_ensure_pud(vdev, &ctx->pgtable, pgd_idx)) return -ENOMEM; @@ -418,6 +432,7 @@ int ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u64 vpu_addr, struct sg_table *sgt, bool llc_coherent) { + size_t start_vpu_addr = vpu_addr; struct scatterlist *sg; int ret; u64 prot; @@ -448,20 +463,36 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, ret = ivpu_mmu_context_map_pages(vdev, ctx, vpu_addr, dma_addr, size, prot); if (ret) { ivpu_err(vdev, "Failed to map context pages\n"); - mutex_unlock(&ctx->lock); - return ret; + goto err_unmap_pages; } vpu_addr += size; } + if (!ctx->is_cd_valid) { + ret = ivpu_mmu_cd_set(vdev, ctx->id, &ctx->pgtable); + if (ret) { + ivpu_err(vdev, "Failed to set context descriptor for context %u: %d\n", + ctx->id, ret); + goto err_unmap_pages; + } + ctx->is_cd_valid = true; + } + /* Ensure page table modifications are flushed from wc buffers to memory */ wmb(); - mutex_unlock(&ctx->lock); - ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id); - if (ret) + if (ret) { ivpu_err(vdev, "Failed to invalidate TLB for ctx %u: %d\n", ctx->id, ret); + goto err_unmap_pages; + } + + mutex_unlock(&ctx->lock); + return 0; + +err_unmap_pages: + ivpu_mmu_context_unmap_pages(ctx, start_vpu_addr, vpu_addr - start_vpu_addr); + mutex_unlock(&ctx->lock); return ret; } @@ -530,65 +561,79 @@ ivpu_mmu_context_remove_node(struct ivpu_mmu_context *ctx, struct drm_mm_node *n mutex_unlock(&ctx->lock); } -static int -ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 context_id) +void ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 context_id) { u64 start, end; - int ret; mutex_init(&ctx->lock); - ret = ivpu_mmu_pgtable_init(vdev, &ctx->pgtable); - if (ret) { - ivpu_err(vdev, "Failed to initialize pgtable for ctx %u: %d\n", context_id, ret); - return ret; - } - if (!context_id) { start = vdev->hw->ranges.global.start; end = vdev->hw->ranges.shave.end; } else { - start = vdev->hw->ranges.user.start; - end = vdev->hw->ranges.dma.end; + start = min_t(u64, vdev->hw->ranges.user.start, vdev->hw->ranges.shave.start); + end = max_t(u64, vdev->hw->ranges.user.end, vdev->hw->ranges.dma.end); } drm_mm_init(&ctx->mm, start, end - start); ctx->id = context_id; - - return 0; } -static void ivpu_mmu_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx) +void ivpu_mmu_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx) { - if (drm_WARN_ON(&vdev->drm, !ctx->pgtable.pgd_dma_ptr)) - return; + if (ctx->is_cd_valid) { + ivpu_mmu_cd_clear(vdev, ctx->id); + ctx->is_cd_valid = false; + } mutex_destroy(&ctx->lock); ivpu_mmu_pgtables_free(vdev, &ctx->pgtable); drm_mm_takedown(&ctx->mm); - - ctx->pgtable.pgd_dma_ptr = NULL; - ctx->pgtable.pgd_dma = 0; } -int ivpu_mmu_global_context_init(struct ivpu_device *vdev) +void ivpu_mmu_global_context_init(struct ivpu_device *vdev) { - return ivpu_mmu_context_init(vdev, &vdev->gctx, IVPU_GLOBAL_CONTEXT_MMU_SSID); + ivpu_mmu_context_init(vdev, &vdev->gctx, IVPU_GLOBAL_CONTEXT_MMU_SSID); } void ivpu_mmu_global_context_fini(struct ivpu_device *vdev) { - return ivpu_mmu_context_fini(vdev, &vdev->gctx); + ivpu_mmu_context_fini(vdev, &vdev->gctx); } int ivpu_mmu_reserved_context_init(struct ivpu_device *vdev) { - return ivpu_mmu_user_context_init(vdev, &vdev->rctx, IVPU_RESERVED_CONTEXT_MMU_SSID); + int ret; + + ivpu_mmu_context_init(vdev, &vdev->rctx, IVPU_RESERVED_CONTEXT_MMU_SSID); + + mutex_lock(&vdev->rctx.lock); + + if (!ivpu_mmu_ensure_pgd(vdev, &vdev->rctx.pgtable)) { + ivpu_err(vdev, "Failed to allocate root page table for reserved context\n"); + ret = -ENOMEM; + goto err_ctx_fini; + } + + ret = ivpu_mmu_cd_set(vdev, vdev->rctx.id, &vdev->rctx.pgtable); + if (ret) { + ivpu_err(vdev, "Failed to set context descriptor for reserved context\n"); + goto err_ctx_fini; + } + + mutex_unlock(&vdev->rctx.lock); + return ret; + +err_ctx_fini: + mutex_unlock(&vdev->rctx.lock); + ivpu_mmu_context_fini(vdev, &vdev->rctx); + return ret; } void ivpu_mmu_reserved_context_fini(struct ivpu_device *vdev) { - return ivpu_mmu_user_context_fini(vdev, &vdev->rctx); + ivpu_mmu_cd_clear(vdev, vdev->rctx.id); + ivpu_mmu_context_fini(vdev, &vdev->rctx); } void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid) @@ -603,36 +648,3 @@ void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid) xa_unlock(&vdev->context_xa); } - -int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 ctx_id) -{ - int ret; - - drm_WARN_ON(&vdev->drm, !ctx_id); - - ret = ivpu_mmu_context_init(vdev, ctx, ctx_id); - if (ret) { - ivpu_err(vdev, "Failed to initialize context %u: %d\n", ctx_id, ret); - return ret; - } - - ret = ivpu_mmu_set_pgtable(vdev, ctx_id, &ctx->pgtable); - if (ret) { - ivpu_err(vdev, "Failed to set page table for context %u: %d\n", ctx_id, ret); - goto err_context_fini; - } - - return 0; - -err_context_fini: - ivpu_mmu_context_fini(vdev, ctx); - return ret; -} - -void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx) -{ - drm_WARN_ON(&vdev->drm, !ctx->id); - - ivpu_mmu_clear_pgtable(vdev, ctx->id); - ivpu_mmu_context_fini(vdev, ctx); -} diff --git a/drivers/accel/ivpu/ivpu_mmu_context.h b/drivers/accel/ivpu/ivpu_mmu_context.h index 7f9aaf3d10c2..8042fc067062 100644 --- a/drivers/accel/ivpu/ivpu_mmu_context.h +++ b/drivers/accel/ivpu/ivpu_mmu_context.h @@ -23,19 +23,20 @@ struct ivpu_mmu_pgtable { }; struct ivpu_mmu_context { - struct mutex lock; /* Protects: mm, pgtable */ + struct mutex lock; /* Protects: mm, pgtable, is_cd_valid */ struct drm_mm mm; struct ivpu_mmu_pgtable pgtable; + bool is_cd_valid; u32 id; }; -int ivpu_mmu_global_context_init(struct ivpu_device *vdev); +void ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 context_id); +void ivpu_mmu_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx); +void ivpu_mmu_global_context_init(struct ivpu_device *vdev); void ivpu_mmu_global_context_fini(struct ivpu_device *vdev); int ivpu_mmu_reserved_context_init(struct ivpu_device *vdev); void ivpu_mmu_reserved_context_fini(struct ivpu_device *vdev); -int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 ctx_id); -void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx); void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid); int ivpu_mmu_context_insert_node(struct ivpu_mmu_context *ctx, const struct ivpu_addr_range *range, diff --git a/drivers/accel/ivpu/ivpu_ms.c b/drivers/accel/ivpu/ivpu_ms.c index 2f9d37f5c208..ffe7b10f8a76 100644 --- a/drivers/accel/ivpu/ivpu_ms.c +++ b/drivers/accel/ivpu/ivpu_ms.c @@ -11,7 +11,7 @@ #include "ivpu_ms.h" #include "ivpu_pm.h" -#define MS_INFO_BUFFER_SIZE SZ_16K +#define MS_INFO_BUFFER_SIZE SZ_64K #define MS_NUM_BUFFERS 2 #define MS_READ_PERIOD_MULTIPLIER 2 #define MS_MIN_SAMPLE_PERIOD_NS 1000000 diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index 59d3170f5e35..949f4233946c 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -9,21 +9,25 @@ #include <linux/pm_runtime.h> #include <linux/reboot.h> -#include "vpu_boot_api.h" +#include "ivpu_coredump.h" #include "ivpu_drv.h" -#include "ivpu_hw.h" #include "ivpu_fw.h" #include "ivpu_fw_log.h" +#include "ivpu_hw.h" #include "ivpu_ipc.h" #include "ivpu_job.h" #include "ivpu_jsm_msg.h" #include "ivpu_mmu.h" #include "ivpu_ms.h" #include "ivpu_pm.h" +#include "ivpu_trace.h" +#include "vpu_boot_api.h" static bool ivpu_disable_recovery; +#if IS_ENABLED(CONFIG_DRM_ACCEL_IVPU_DEBUG) module_param_named_unsafe(disable_recovery, ivpu_disable_recovery, bool, 0644); MODULE_PARM_DESC(disable_recovery, "Disables recovery when NPU hang is detected"); +#endif static unsigned long ivpu_tdr_timeout_ms; module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, ulong, 0644); @@ -37,6 +41,7 @@ static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev) ivpu_cmdq_reset_all_contexts(vdev); ivpu_ipc_reset(vdev); + ivpu_fw_log_reset(vdev); ivpu_fw_load(vdev); fw->entry_point = fw->cold_boot_entry_point; } @@ -123,7 +128,8 @@ static void ivpu_pm_recovery_work(struct work_struct *work) if (ret) ivpu_err(vdev, "Failed to resume NPU: %d\n", ret); - ivpu_fw_log_dump(vdev); + ivpu_jsm_state_dump(vdev); + ivpu_dev_coredump(vdev); atomic_inc(&vdev->pm->reset_counter); atomic_set(&vdev->pm->reset_pending, 1); @@ -195,6 +201,7 @@ int ivpu_pm_suspend_cb(struct device *dev) struct ivpu_device *vdev = to_ivpu_device(drm); unsigned long timeout; + trace_pm("suspend"); ivpu_dbg(vdev, PM, "Suspend..\n"); timeout = jiffies + msecs_to_jiffies(vdev->timeout.tdr); @@ -212,6 +219,7 @@ int ivpu_pm_suspend_cb(struct device *dev) ivpu_pm_prepare_warm_boot(vdev); ivpu_dbg(vdev, PM, "Suspend done.\n"); + trace_pm("suspend done"); return 0; } @@ -222,6 +230,7 @@ int ivpu_pm_resume_cb(struct device *dev) struct ivpu_device *vdev = to_ivpu_device(drm); int ret; + trace_pm("resume"); ivpu_dbg(vdev, PM, "Resume..\n"); ret = ivpu_resume(vdev); @@ -229,6 +238,7 @@ int ivpu_pm_resume_cb(struct device *dev) ivpu_err(vdev, "Failed to resume: %d\n", ret); ivpu_dbg(vdev, PM, "Resume done.\n"); + trace_pm("resume done"); return ret; } @@ -243,6 +253,7 @@ int ivpu_pm_runtime_suspend_cb(struct device *dev) drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa)); drm_WARN_ON(&vdev->drm, work_pending(&vdev->pm->recovery_work)); + trace_pm("runtime suspend"); ivpu_dbg(vdev, PM, "Runtime suspend..\n"); ivpu_mmu_disable(vdev); @@ -262,13 +273,14 @@ int ivpu_pm_runtime_suspend_cb(struct device *dev) if (!is_idle || ret_d0i3) { ivpu_err(vdev, "Forcing cold boot due to previous errors\n"); atomic_inc(&vdev->pm->reset_counter); - ivpu_fw_log_dump(vdev); + ivpu_dev_coredump(vdev); ivpu_pm_prepare_cold_boot(vdev); } else { ivpu_pm_prepare_warm_boot(vdev); } ivpu_dbg(vdev, PM, "Runtime suspend done.\n"); + trace_pm("runtime suspend done"); return 0; } @@ -279,6 +291,7 @@ int ivpu_pm_runtime_resume_cb(struct device *dev) struct ivpu_device *vdev = to_ivpu_device(drm); int ret; + trace_pm("runtime resume"); ivpu_dbg(vdev, PM, "Runtime resume..\n"); ret = ivpu_resume(vdev); @@ -286,6 +299,7 @@ int ivpu_pm_runtime_resume_cb(struct device *dev) ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret); ivpu_dbg(vdev, PM, "Runtime resume done.\n"); + trace_pm("runtime resume done"); return ret; } @@ -364,6 +378,7 @@ void ivpu_pm_init(struct ivpu_device *vdev) pm_runtime_use_autosuspend(dev); pm_runtime_set_autosuspend_delay(dev, delay); + pm_runtime_set_active(dev); ivpu_dbg(vdev, PM, "Autosuspend delay = %d\n", delay); } @@ -378,7 +393,6 @@ void ivpu_pm_enable(struct ivpu_device *vdev) { struct device *dev = vdev->drm.dev; - pm_runtime_set_active(dev); pm_runtime_allow(dev); pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); @@ -411,7 +425,7 @@ int ivpu_pm_dct_enable(struct ivpu_device *vdev, u8 active_percent) ret = ivpu_jsm_dct_enable(vdev, active_us, inactive_us); if (ret) { - ivpu_err_ratelimited(vdev, "Filed to enable DCT: %d\n", ret); + ivpu_err_ratelimited(vdev, "Failed to enable DCT: %d\n", ret); return ret; } @@ -428,7 +442,7 @@ int ivpu_pm_dct_disable(struct ivpu_device *vdev) ret = ivpu_jsm_dct_disable(vdev); if (ret) { - ivpu_err_ratelimited(vdev, "Filed to disable DCT: %d\n", ret); + ivpu_err_ratelimited(vdev, "Failed to disable DCT: %d\n", ret); return ret; } diff --git a/drivers/accel/ivpu/ivpu_sysfs.c b/drivers/accel/ivpu/ivpu_sysfs.c index 913669f1786e..616477fc17fa 100644 --- a/drivers/accel/ivpu/ivpu_sysfs.c +++ b/drivers/accel/ivpu/ivpu_sysfs.c @@ -6,6 +6,8 @@ #include <linux/device.h> #include <linux/err.h> +#include "ivpu_drv.h" +#include "ivpu_fw.h" #include "ivpu_hw.h" #include "ivpu_sysfs.h" @@ -39,8 +41,30 @@ npu_busy_time_us_show(struct device *dev, struct device_attribute *attr, char *b static DEVICE_ATTR_RO(npu_busy_time_us); +/** + * DOC: sched_mode + * + * The sched_mode is used to report current NPU scheduling mode. + * + * It returns following strings: + * - "HW" - Hardware Scheduler mode + * - "OS" - Operating System Scheduler mode + * + */ +static ssize_t +sched_mode_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct ivpu_device *vdev = to_ivpu_device(drm); + + return sysfs_emit(buf, "%s\n", vdev->fw->sched_mode ? "HW" : "OS"); +} + +static DEVICE_ATTR_RO(sched_mode); + static struct attribute *ivpu_dev_attrs[] = { &dev_attr_npu_busy_time_us.attr, + &dev_attr_sched_mode.attr, NULL, }; diff --git a/drivers/accel/ivpu/ivpu_trace.h b/drivers/accel/ivpu/ivpu_trace.h new file mode 100644 index 000000000000..eb792038e701 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_trace.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#if !defined(__IVPU_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ) +#define __IVPU_TRACE_H__ + +#include <linux/tracepoint.h> +#include "ivpu_drv.h" +#include "ivpu_job.h" +#include "vpu_jsm_api.h" +#include "ivpu_jsm_msg.h" +#include "ivpu_ipc.h" + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM vpu +#define TRACE_INCLUDE_FILE ivpu_trace + +TRACE_EVENT(pm, + TP_PROTO(const char *event), + TP_ARGS(event), + TP_STRUCT__entry(__field(const char *, event)), + TP_fast_assign(__entry->event = event;), + TP_printk("%s", __entry->event) +); + +TRACE_EVENT(job, + TP_PROTO(const char *event, struct ivpu_job *job), + TP_ARGS(event, job), + TP_STRUCT__entry(__field(const char *, event) + __field(u32, ctx_id) + __field(u32, engine_id) + __field(u32, job_id) + ), + TP_fast_assign(__entry->event = event; + __entry->ctx_id = job->file_priv->ctx.id; + __entry->engine_id = job->engine_idx; + __entry->job_id = job->job_id;), + TP_printk("%s context:%d engine:%d job:%d", + __entry->event, + __entry->ctx_id, + __entry->engine_id, + __entry->job_id) +); + +TRACE_EVENT(jsm, + TP_PROTO(const char *event, struct vpu_jsm_msg *msg), + TP_ARGS(event, msg), + TP_STRUCT__entry(__field(const char *, event) + __field(const char *, type) + __field(enum vpu_ipc_msg_status, status) + __field(u32, request_id) + __field(u32, result) + ), + TP_fast_assign(__entry->event = event; + __entry->type = ivpu_jsm_msg_type_to_str(msg->type); + __entry->status = msg->status; + __entry->request_id = msg->request_id; + __entry->result = msg->result;), + TP_printk("%s type:%s, status:%#x, id:%#x, result:%#x", + __entry->event, + __entry->type, + __entry->status, + __entry->request_id, + __entry->result) +); + +#endif /* __IVPU_TRACE_H__ */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#include <trace/define_trace.h> diff --git a/drivers/accel/ivpu/ivpu_trace_points.c b/drivers/accel/ivpu/ivpu_trace_points.c new file mode 100644 index 000000000000..f8fb99de0de3 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_trace_points.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#ifndef __CHECKER__ +#define CREATE_TRACE_POINTS +#include "ivpu_trace.h" +#endif diff --git a/drivers/accel/ivpu/vpu_boot_api.h b/drivers/accel/ivpu/vpu_boot_api.h index 82954b91b748..908e68ea1c39 100644 --- a/drivers/accel/ivpu/vpu_boot_api.h +++ b/drivers/accel/ivpu/vpu_boot_api.h @@ -1,14 +1,13 @@ /* SPDX-License-Identifier: MIT */ /* - * Copyright (c) 2020-2023, Intel Corporation. + * Copyright (c) 2020-2024, Intel Corporation. */ #ifndef VPU_BOOT_API_H #define VPU_BOOT_API_H /* - * =========== FW API version information beginning ================ - * The bellow values will be used to construct the version info this way: + * The below values will be used to construct the version info this way: * fw_bin_header->api_version[VPU_BOOT_API_VER_ID] = (VPU_BOOT_API_VER_MAJOR << 16) | * VPU_BOOT_API_VER_MINOR; * VPU_BOOT_API_VER_PATCH will be ignored. KMD and compatibility is not affected if this changes @@ -27,19 +26,18 @@ * Minor version changes when API backward compatibility is preserved. * Resets to 0 if Major version is incremented. */ -#define VPU_BOOT_API_VER_MINOR 24 +#define VPU_BOOT_API_VER_MINOR 26 /* * API header changed (field names, documentation, formatting) but API itself has not been changed */ -#define VPU_BOOT_API_VER_PATCH 0 +#define VPU_BOOT_API_VER_PATCH 3 /* * Index in the API version table * Must be unique for each API */ #define VPU_BOOT_API_VER_INDEX 0 -/* ------------ FW API version information end ---------------------*/ #pragma pack(push, 4) @@ -164,8 +162,6 @@ enum vpu_trace_destination { /* VPU 30xx HW component IDs are sequential, so define first and last IDs. */ #define VPU_TRACE_PROC_BIT_30XX_FIRST VPU_TRACE_PROC_BIT_LRT #define VPU_TRACE_PROC_BIT_30XX_LAST VPU_TRACE_PROC_BIT_SHV_15 -#define VPU_TRACE_PROC_BIT_KMB_FIRST VPU_TRACE_PROC_BIT_30XX_FIRST -#define VPU_TRACE_PROC_BIT_KMB_LAST VPU_TRACE_PROC_BIT_30XX_LAST struct vpu_boot_l2_cache_config { u8 use; @@ -199,6 +195,17 @@ struct vpu_warm_boot_section { */ #define POWER_PROFILE_SURVIVABILITY 0x1 +/** + * Enum for dvfs_mode boot param. + */ +enum vpu_governor { + VPU_GOV_DEFAULT = 0, /* Default Governor for the system */ + VPU_GOV_MAX_PERFORMANCE = 1, /* Maximum performance governor */ + VPU_GOV_ON_DEMAND = 2, /* On Demand frequency control governor */ + VPU_GOV_POWER_SAVE = 3, /* Power save governor */ + VPU_GOV_ON_DEMAND_PRIORITY_AWARE = 4 /* On Demand priority based governor */ +}; + struct vpu_boot_params { u32 magic; u32 vpu_id; @@ -301,7 +308,14 @@ struct vpu_boot_params { u32 temp_sensor_period_ms; /** PLL ratio for efficient clock frequency */ u32 pn_freq_pll_ratio; - /** DVFS Mode: Default: 0, Max Performance: 1, On Demand: 2, Power Save: 3 */ + /** + * DVFS Mode: + * 0 - Default, DVFS mode selected by the firmware + * 1 - Max Performance + * 2 - On Demand + * 3 - Power Save + * 4 - On Demand Priority Aware + */ u32 dvfs_mode; /** * Depending on DVFS Mode: @@ -332,8 +346,8 @@ struct vpu_boot_params { u64 d0i3_entry_vpu_ts; /* * The system time of the host operating system in microseconds. - * E.g the number of microseconds since 1st of January 1970, or whatever date the - * host operating system uses to maintain system time. + * E.g the number of microseconds since 1st of January 1970, or whatever + * date the host operating system uses to maintain system time. * This value will be used to track system time on the VPU. * The KMD is required to update this value on every VPU reset. */ @@ -382,10 +396,7 @@ struct vpu_boot_params { u32 pad6[734]; }; -/* - * Magic numbers set between host and vpu to detect corruptio of tracing init - */ - +/* Magic numbers set between host and vpu to detect corruption of tracing init */ #define VPU_TRACING_BUFFER_CANARY (0xCAFECAFE) /* Tracing buffer message format definitions */ @@ -405,7 +416,9 @@ struct vpu_tracing_buffer_header { u32 host_canary_start; /* offset from start of buffer for trace entries */ u32 read_index; - u32 pad_to_cache_line_size_0[14]; + /* keeps track of wrapping on the reader side */ + u32 read_wrap_count; + u32 pad_to_cache_line_size_0[13]; /* End of first cache line */ /** diff --git a/drivers/accel/ivpu/vpu_jsm_api.h b/drivers/accel/ivpu/vpu_jsm_api.h index 33f462b1a25d..7215c144158c 100644 --- a/drivers/accel/ivpu/vpu_jsm_api.h +++ b/drivers/accel/ivpu/vpu_jsm_api.h @@ -22,7 +22,7 @@ /* * Minor version changes when API backward compatibility is preserved. */ -#define VPU_JSM_API_VER_MINOR 16 +#define VPU_JSM_API_VER_MINOR 25 /* * API header changed (field names, documentation, formatting) but API itself has not been changed @@ -36,7 +36,7 @@ /* * Number of Priority Bands for Hardware Scheduling - * Bands: RealTime, Focus, Normal, Idle + * Bands: Idle(0), Normal(1), Focus(2), RealTime(3) */ #define VPU_HWS_NUM_PRIORITY_BANDS 4 @@ -74,6 +74,7 @@ #define VPU_JSM_STATUS_MVNCI_INTERNAL_ERROR 0xCU /* Job status returned when the job was preempted mid-inference */ #define VPU_JSM_STATUS_PREEMPTED_MID_INFERENCE 0xDU +#define VPU_JSM_STATUS_MVNCI_CONTEXT_VIOLATION_HW 0xEU /* * Host <-> VPU IPC channels. @@ -86,18 +87,58 @@ /* * Job flags bit masks. */ -#define VPU_JOB_FLAGS_NULL_SUBMISSION_MASK 0x00000001 -#define VPU_JOB_FLAGS_PRIVATE_DATA_MASK 0xFF000000 +enum { + /* + * Null submission mask. + * When set, batch buffer's commands are not processed but returned as + * successful immediately, except fences and timestamps. + * When cleared, batch buffer's commands are processed normally. + * Used for testing and profiling purposes. + */ + VPU_JOB_FLAGS_NULL_SUBMISSION_MASK = (1 << 0U), + /* + * Inline command mask. + * When set, the object in job queue is an inline command (see struct vpu_inline_cmd below). + * When cleared, the object in job queue is a job (see struct vpu_job_queue_entry below). + */ + VPU_JOB_FLAGS_INLINE_CMD_MASK = (1 << 1U), + /* + * VPU private data mask. + * Reserved for the VPU to store private data about the job (or inline command) + * while being processed. + */ + VPU_JOB_FLAGS_PRIVATE_DATA_MASK = 0xFFFF0000U +}; /* - * Sizes of the reserved areas in jobs, in bytes. + * Job queue flags bit masks. */ -#define VPU_JOB_RESERVED_BYTES 8 +enum { + /* + * No job done notification mask. + * When set, indicates that no job done notification should be sent for any + * job from this queue. When cleared, indicates that job done notification + * should be sent for every job completed from this queue. + */ + VPU_JOB_QUEUE_FLAGS_NO_JOB_DONE_MASK = (1 << 0U), + /* + * Native fence usage mask. + * When set, indicates that job queue uses native fences (as inline commands + * in job queue). Such queues may also use legacy fences (as commands in batch buffers). + * When cleared, indicates the job queue only uses legacy fences. + * NOTE: For queues using native fences, VPU expects that all jobs in the queue + * are immediately followed by an inline command object. This object is expected + * to be a fence signal command in most cases, but can also be a NOP in case the host + * does not need per-job fence signalling. Other inline commands objects can be + * inserted between "job and inline command" pairs. + */ + VPU_JOB_QUEUE_FLAGS_USE_NATIVE_FENCE_MASK = (1 << 1U), -/* - * Sizes of the reserved areas in job queues, in bytes. - */ -#define VPU_JOB_QUEUE_RESERVED_BYTES 52 + /* + * Enable turbo mode for testing NPU performance; not recommended for regular usage. + */ + VPU_JOB_QUEUE_FLAGS_TURBO_MODE = (1 << 2U) +}; /* * Max length (including trailing NULL char) of trace entity name (e.g., the @@ -141,23 +182,112 @@ #define VPU_HWS_INVALID_CMDQ_HANDLE 0ULL /* + * Inline commands types. + */ +/* + * NOP. + * VPU does nothing other than consuming the inline command object. + */ +#define VPU_INLINE_CMD_TYPE_NOP 0x0 +/* + * Fence wait. + * VPU waits for the fence current value to reach monitored value. + * Fence wait operations are executed upon job dispatching. While waiting for + * the fence to be satisfied, VPU blocks fetching of the next objects in the queue. + * Jobs present in the queue prior to the fence wait object may be processed + * concurrently. + */ +#define VPU_INLINE_CMD_TYPE_FENCE_WAIT 0x1 +/* + * Fence signal. + * VPU sets the fence current value to the provided value. If new current value + * is equal to or higher than monitored value, VPU sends fence signalled notification + * to the host. Fence signal operations are executed upon completion of all the jobs + * present in the queue prior to them, and in-order relative to each other in the queue. + * But jobs in-between them may be processed concurrently and may complete out-of-order. + */ +#define VPU_INLINE_CMD_TYPE_FENCE_SIGNAL 0x2 + +/* + * Job scheduling priority bands for both hardware scheduling and OS scheduling. + */ +enum vpu_job_scheduling_priority_band { + VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE = 0, + VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL = 1, + VPU_JOB_SCHEDULING_PRIORITY_BAND_FOCUS = 2, + VPU_JOB_SCHEDULING_PRIORITY_BAND_REALTIME = 3, + VPU_JOB_SCHEDULING_PRIORITY_BAND_COUNT = 4, +}; + +/* * Job format. + * Jobs defines the actual workloads to be executed by a given engine. */ struct vpu_job_queue_entry { - u64 batch_buf_addr; /**< Address of VPU commands batch buffer */ - u32 job_id; /**< Job ID */ - u32 flags; /**< Flags bit field, see VPU_JOB_FLAGS_* above */ - u64 root_page_table_addr; /**< Address of root page table to use for this job */ - u64 root_page_table_update_counter; /**< Page tables update events counter */ - u64 primary_preempt_buf_addr; + /**< Address of VPU commands batch buffer */ + u64 batch_buf_addr; + /**< Job ID */ + u32 job_id; + /**< Flags bit field, see VPU_JOB_FLAGS_* above */ + u32 flags; + /** + * Doorbell ring timestamp taken by KMD from SoC's global system clock, in + * microseconds. NPU can convert this value to its own fixed clock's timebase, + * to match other profiling timestamps. + */ + u64 doorbell_timestamp; + /**< Extra id for job tracking, used only in the firmware perf traces */ + u64 host_tracking_id; /**< Address of the primary preemption buffer to use for this job */ - u32 primary_preempt_buf_size; + u64 primary_preempt_buf_addr; /**< Size of the primary preemption buffer to use for this job */ - u32 secondary_preempt_buf_size; + u32 primary_preempt_buf_size; /**< Size of secondary preemption buffer to use for this job */ - u64 secondary_preempt_buf_addr; + u32 secondary_preempt_buf_size; /**< Address of secondary preemption buffer to use for this job */ - u8 reserved_0[VPU_JOB_RESERVED_BYTES]; + u64 secondary_preempt_buf_addr; + u64 reserved_0; +}; + +/* + * Inline command format. + * Inline commands are the commands executed at scheduler level (typically, + * synchronization directives). Inline command and job objects must be of + * the same size and have flags field at same offset. + */ +struct vpu_inline_cmd { + u64 reserved_0; + /* Inline command type, see VPU_INLINE_CMD_TYPE_* defines. */ + u32 type; + /* Flags bit field, see VPU_JOB_FLAGS_* above. */ + u32 flags; + /* Inline command payload. Depends on inline command type. */ + union { + /* Fence (wait and signal) commands' payload. */ + struct { + /* Fence object handle. */ + u64 fence_handle; + /* User VA of the current fence value. */ + u64 current_value_va; + /* User VA of the monitored fence value (read-only). */ + u64 monitored_value_va; + /* Value to wait for or write in fence location. */ + u64 value; + /* User VA of the log buffer in which to add log entry on completion. */ + u64 log_buffer_va; + } fence; + /* Other commands do not have a payload. */ + /* Payload definition for future inline commands can be inserted here. */ + u64 reserved_1[6]; + } payload; +}; + +/* + * Job queue slots can be populated either with job objects or inline command objects. + */ +union vpu_jobq_slot { + struct vpu_job_queue_entry job; + struct vpu_inline_cmd inline_cmd; }; /* @@ -167,7 +297,21 @@ struct vpu_job_queue_header { u32 engine_idx; u32 head; u32 tail; - u8 reserved_0[VPU_JOB_QUEUE_RESERVED_BYTES]; + u32 flags; + /* Set to 1 to indicate priority_band field is valid */ + u32 priority_band_valid; + /* + * Priority for the work of this job queue, valid only if the HWS is NOT used + * and the `priority_band_valid` is set to 1. It is applied only during + * the VPU_JSM_MSG_REGISTER_DB message processing. + * The device firmware might use the `priority_band` to optimize the power + * management logic, but it will not affect the order of jobs. + * Available priority bands: @see enum vpu_job_scheduling_priority_band + */ + u32 priority_band; + /* Inside realtime band assigns a further priority, limited to 0..31 range */ + u32 realtime_priority_level; + u32 reserved_0[9]; }; /* @@ -175,7 +319,7 @@ struct vpu_job_queue_header { */ struct vpu_job_queue { struct vpu_job_queue_header header; - struct vpu_job_queue_entry job[]; + union vpu_jobq_slot slot[]; }; /** @@ -197,9 +341,7 @@ enum vpu_trace_entity_type { struct vpu_hws_log_buffer_header { /* Written by VPU after adding a log entry. Initialised by host to 0. */ u32 first_free_entry_index; - /* Incremented by VPU every time the VPU overwrites the 0th entry; - * initialised by host to 0. - */ + /* Incremented by VPU every time the VPU writes the 0th entry; initialised by host to 0. */ u32 wraparound_count; /* * This is the number of buffers that can be stored in the log buffer provided by the host. @@ -230,14 +372,80 @@ struct vpu_hws_log_buffer_entry { u64 operation_data[2]; }; +/* Native fence log buffer types. */ +enum vpu_hws_native_fence_log_type { + VPU_HWS_NATIVE_FENCE_LOG_TYPE_WAITS = 1, + VPU_HWS_NATIVE_FENCE_LOG_TYPE_SIGNALS = 2 +}; + +/* HWS native fence log buffer header. */ +struct vpu_hws_native_fence_log_header { + union { + struct { + /* Index of the first free entry in buffer. */ + u32 first_free_entry_idx; + /* Incremented each time NPU wraps around the buffer to write next entry. */ + u32 wraparound_count; + }; + /* Field allowing atomic update of both fields above. */ + u64 atomic_wraparound_and_entry_idx; + }; + /* Log buffer type, see enum vpu_hws_native_fence_log_type. */ + u64 type; + /* Allocated number of entries in the log buffer. */ + u64 entry_nb; + u64 reserved[2]; +}; + +/* Native fence log operation types. */ +enum vpu_hws_native_fence_log_op { + VPU_HWS_NATIVE_FENCE_LOG_OP_SIGNAL_EXECUTED = 0, + VPU_HWS_NATIVE_FENCE_LOG_OP_WAIT_UNBLOCKED = 1 +}; + +/* HWS native fence log entry. */ +struct vpu_hws_native_fence_log_entry { + /* Newly signaled/unblocked fence value. */ + u64 fence_value; + /* Native fence object handle to which this operation belongs. */ + u64 fence_handle; + /* Operation type, see enum vpu_hws_native_fence_log_op. */ + u64 op_type; + u64 reserved_0; + /* + * VPU_HWS_NATIVE_FENCE_LOG_OP_WAIT_UNBLOCKED only: Timestamp at which fence + * wait was started (in NPU SysTime). + */ + u64 fence_wait_start_ts; + u64 reserved_1; + /* Timestamp at which fence operation was completed (in NPU SysTime). */ + u64 fence_end_ts; +}; + +/* Native fence log buffer. */ +struct vpu_hws_native_fence_log_buffer { + struct vpu_hws_native_fence_log_header header; + struct vpu_hws_native_fence_log_entry entry[]; +}; + /* * Host <-> VPU IPC messages types. */ enum vpu_ipc_msg_type { VPU_JSM_MSG_UNKNOWN = 0xFFFFFFFF, + /* IPC Host -> Device, Async commands */ VPU_JSM_MSG_ASYNC_CMD = 0x1100, VPU_JSM_MSG_ENGINE_RESET = VPU_JSM_MSG_ASYNC_CMD, + /** + * Preempt engine. The NPU stops (preempts) all the jobs currently + * executing on the target engine making the engine become idle and ready to + * execute new jobs. + * NOTE: The NPU does not remove unstarted jobs (if any) from job queues of + * the target engine, but it stops processing them (until the queue doorbell + * is rung again); the host is responsible to reset the job queue, either + * after preemption or when resubmitting jobs to the queue. + */ VPU_JSM_MSG_ENGINE_PREEMPT = 0x1101, VPU_JSM_MSG_REGISTER_DB = 0x1102, VPU_JSM_MSG_UNREGISTER_DB = 0x1103, @@ -323,9 +531,10 @@ enum vpu_ipc_msg_type { * NOTE: Please introduce new ASYNC commands before this one. * */ VPU_JSM_MSG_STATE_DUMP = 0x11FF, + /* IPC Host -> Device, General commands */ VPU_JSM_MSG_GENERAL_CMD = 0x1200, - VPU_JSM_MSG_BLOB_DEINIT = VPU_JSM_MSG_GENERAL_CMD, + VPU_JSM_MSG_BLOB_DEINIT_DEPRECATED = VPU_JSM_MSG_GENERAL_CMD, /** * Control dyndbg behavior by executing a dyndbg command; equivalent to * Linux command: `echo '<dyndbg_cmd>' > <debugfs>/dynamic_debug/control`. @@ -335,8 +544,12 @@ enum vpu_ipc_msg_type { * Perform the save procedure for the D0i3 entry */ VPU_JSM_MSG_PWR_D0I3_ENTER = 0x1202, + /* IPC Device -> Host, Job completion */ VPU_JSM_MSG_JOB_DONE = 0x2100, + /* IPC Device -> Host, Fence signalled */ + VPU_JSM_MSG_NATIVE_FENCE_SIGNALLED = 0x2101, + /* IPC Device -> Host, Async command completion */ VPU_JSM_MSG_ASYNC_CMD_DONE = 0x2200, VPU_JSM_MSG_ENGINE_RESET_DONE = VPU_JSM_MSG_ASYNC_CMD_DONE, @@ -422,6 +635,7 @@ enum vpu_ipc_msg_type { * NOTE: Please introduce new ASYNC responses before this one. * */ VPU_JSM_MSG_STATE_DUMP_RSP = 0x22FF, + /* IPC Device -> Host, General command completion */ VPU_JSM_MSG_GENERAL_CMD_DONE = 0x2300, VPU_JSM_MSG_BLOB_DEINIT_DONE = VPU_JSM_MSG_GENERAL_CMD_DONE, @@ -600,11 +814,6 @@ struct vpu_jsm_metric_streamer_update { u64 next_buffer_size; }; -struct vpu_ipc_msg_payload_blob_deinit { - /* 64-bit unique ID for the blob to be de-initialized. */ - u64 blob_id; -}; - struct vpu_ipc_msg_payload_job_done { /* Engine to which the job was submitted. */ u32 engine_idx; @@ -622,6 +831,21 @@ struct vpu_ipc_msg_payload_job_done { u64 cmdq_id; }; +/* + * Notification message upon native fence signalling. + * @see VPU_JSM_MSG_NATIVE_FENCE_SIGNALLED + */ +struct vpu_ipc_msg_payload_native_fence_signalled { + /* Engine ID. */ + u32 engine_idx; + /* Host SSID. */ + u32 host_ssid; + /* CMDQ ID */ + u64 cmdq_id; + /* Fence object handle. */ + u64 fence_handle; +}; + struct vpu_jsm_engine_reset_context { /* Host SSID */ u32 host_ssid; @@ -700,11 +924,6 @@ struct vpu_ipc_msg_payload_get_power_level_count_done { u8 power_limit[16]; }; -struct vpu_ipc_msg_payload_blob_deinit_done { - /* 64-bit unique ID for the blob de-initialized. */ - u64 blob_id; -}; - /* HWS priority band setup request / response */ struct vpu_ipc_msg_payload_hws_priority_band_setup { /* @@ -794,7 +1013,10 @@ struct vpu_ipc_msg_payload_hws_set_context_sched_properties { u32 reserved_0; /* Command queue id */ u64 cmdq_id; - /* Priority band to assign to work of this context */ + /* + * Priority band to assign to work of this context. + * Available priority bands: @see enum vpu_job_scheduling_priority_band + */ u32 priority_band; /* Inside realtime band assigns a further priority */ u32 realtime_priority_level; @@ -869,9 +1091,7 @@ struct vpu_ipc_msg_payload_hws_set_scheduling_log { */ u64 notify_index; /* - * Enable extra events to be output to log for debug of scheduling algorithm. - * Interpreted by VPU as a boolean to enable or disable, expected values are - * 0 and 1. + * Field is now deprecated, will be removed when KMD is updated to support removal */ u32 enable_extra_events; /* Zero Padding */ @@ -1243,10 +1463,10 @@ union vpu_ipc_msg_payload { struct vpu_jsm_metric_streamer_start metric_streamer_start; struct vpu_jsm_metric_streamer_stop metric_streamer_stop; struct vpu_jsm_metric_streamer_update metric_streamer_update; - struct vpu_ipc_msg_payload_blob_deinit blob_deinit; struct vpu_ipc_msg_payload_ssid_release ssid_release; struct vpu_jsm_hws_register_db hws_register_db; struct vpu_ipc_msg_payload_job_done job_done; + struct vpu_ipc_msg_payload_native_fence_signalled native_fence_signalled; struct vpu_ipc_msg_payload_engine_reset_done engine_reset_done; struct vpu_ipc_msg_payload_engine_preempt_done engine_preempt_done; struct vpu_ipc_msg_payload_register_db_done register_db_done; @@ -1254,7 +1474,6 @@ union vpu_ipc_msg_payload { struct vpu_ipc_msg_payload_query_engine_hb_done query_engine_hb_done; struct vpu_ipc_msg_payload_get_power_level_count_done get_power_level_count_done; struct vpu_jsm_metric_streamer_done metric_streamer_done; - struct vpu_ipc_msg_payload_blob_deinit_done blob_deinit_done; struct vpu_ipc_msg_payload_trace_config trace_config; struct vpu_ipc_msg_payload_trace_capability_rsp trace_capability; struct vpu_ipc_msg_payload_trace_get_name trace_get_name; diff --git a/drivers/accel/qaic/mhi_controller.c b/drivers/accel/qaic/mhi_controller.c index ada9b1eb0787..8ab82e78dd94 100644 --- a/drivers/accel/qaic/mhi_controller.c +++ b/drivers/accel/qaic/mhi_controller.c @@ -405,6 +405,38 @@ static const struct mhi_channel_config aic100_channels[] = { .auto_queue = false, .wake_capable = false, }, + { + .name = "IPCR", + .num = 24, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_TO_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = false, + .wake_capable = false, + }, + { + .name = "IPCR", + .num = 25, + .num_elements = 32, + .local_elements = 0, + .event_ring = 0, + .dir = DMA_FROM_DEVICE, + .ee_mask = MHI_CH_EE_AMSS, + .pollcfg = 0, + .doorbell = MHI_DB_BRST_DISABLE, + .lpm_notify = false, + .offload_channel = false, + .doorbell_mode_switch = false, + .auto_queue = true, + .wake_capable = false, + }, }; static struct mhi_event_config aic100_events[] = { diff --git a/drivers/accel/qaic/qaic_debugfs.c b/drivers/accel/qaic/qaic_debugfs.c index 20b653d99e52..ba0cf2f94732 100644 --- a/drivers/accel/qaic/qaic_debugfs.c +++ b/drivers/accel/qaic/qaic_debugfs.c @@ -64,20 +64,9 @@ static int bootlog_show(struct seq_file *s, void *unused) return 0; } -static int bootlog_fops_open(struct inode *inode, struct file *file) -{ - return single_open(file, bootlog_show, inode->i_private); -} - -static const struct file_operations bootlog_fops = { - .owner = THIS_MODULE, - .open = bootlog_fops_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(bootlog); -static int read_dbc_fifo_size(struct seq_file *s, void *unused) +static int fifo_size_show(struct seq_file *s, void *unused) { struct dma_bridge_chan *dbc = s->private; @@ -85,20 +74,9 @@ static int read_dbc_fifo_size(struct seq_file *s, void *unused) return 0; } -static int fifo_size_open(struct inode *inode, struct file *file) -{ - return single_open(file, read_dbc_fifo_size, inode->i_private); -} - -static const struct file_operations fifo_size_fops = { - .owner = THIS_MODULE, - .open = fifo_size_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(fifo_size); -static int read_dbc_queued(struct seq_file *s, void *unused) +static int queued_show(struct seq_file *s, void *unused) { struct dma_bridge_chan *dbc = s->private; u32 tail = 0, head = 0; @@ -115,18 +93,7 @@ static int read_dbc_queued(struct seq_file *s, void *unused) return 0; } -static int queued_open(struct inode *inode, struct file *file) -{ - return single_open(file, read_dbc_queued, inode->i_private); -} - -static const struct file_operations queued_fops = { - .owner = THIS_MODULE, - .open = queued_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(queued); void qaic_debugfs_init(struct qaic_drm_device *qddev) { diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c index bf10156c334e..4ddf89308ff5 100644 --- a/drivers/accel/qaic/qaic_drv.c +++ b/drivers/accel/qaic/qaic_drv.c @@ -32,8 +32,9 @@ #include "qaic_timesync.h" #include "sahara.h" -MODULE_IMPORT_NS(DMA_BUF); +MODULE_IMPORT_NS("DMA_BUF"); +#define PCI_DEV_AIC080 0xa080 #define PCI_DEV_AIC100 0xa100 #define QAIC_NAME "qaic" #define QAIC_DESC "Qualcomm Cloud AI Accelerators" @@ -53,12 +54,12 @@ static void qaicm_wq_release(struct drm_device *dev, void *res) destroy_workqueue(wq); } -static struct workqueue_struct *qaicm_wq_init(struct drm_device *dev, const char *fmt) +static struct workqueue_struct *qaicm_wq_init(struct drm_device *dev, const char *name) { struct workqueue_struct *wq; int ret; - wq = alloc_workqueue(fmt, WQ_UNBOUND, 0); + wq = alloc_workqueue("%s", WQ_UNBOUND, 0, name); if (!wq) return ERR_PTR(-ENOMEM); ret = drmm_add_action_or_reset(dev, qaicm_wq_release, wq); @@ -365,7 +366,7 @@ static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_de return NULL; qdev->dev_state = QAIC_OFFLINE; - if (id->device == PCI_DEV_AIC100) { + if (id->device == PCI_DEV_AIC080 || id->device == PCI_DEV_AIC100) { qdev->num_dbc = 16; qdev->dbc = devm_kcalloc(dev, qdev->num_dbc, sizeof(*qdev->dbc), GFP_KERNEL); if (!qdev->dbc) @@ -607,6 +608,7 @@ static struct mhi_driver qaic_mhi_driver = { }; static const struct pci_device_id qaic_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_QCOM, PCI_DEV_AIC080), }, { PCI_DEVICE(PCI_VENDOR_ID_QCOM, PCI_DEV_AIC100), }, { } }; diff --git a/drivers/accel/qaic/sahara.c b/drivers/accel/qaic/sahara.c index bf94bbab6be5..6d772143d612 100644 --- a/drivers/accel/qaic/sahara.c +++ b/drivers/accel/qaic/sahara.c @@ -2,6 +2,7 @@ /* Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. */ +#include <linux/devcoredump.h> #include <linux/firmware.h> #include <linux/limits.h> #include <linux/mhi.h> @@ -9,6 +10,7 @@ #include <linux/mod_devicetable.h> #include <linux/overflow.h> #include <linux/types.h> +#include <linux/vmalloc.h> #include <linux/workqueue.h> #include "sahara.h" @@ -36,12 +38,14 @@ #define SAHARA_PACKET_MAX_SIZE 0xffffU /* MHI_MAX_MTU */ #define SAHARA_TRANSFER_MAX_SIZE 0x80000 +#define SAHARA_READ_MAX_SIZE 0xfff0U /* Avoid unaligned requests */ #define SAHARA_NUM_TX_BUF DIV_ROUND_UP(SAHARA_TRANSFER_MAX_SIZE,\ SAHARA_PACKET_MAX_SIZE) #define SAHARA_IMAGE_ID_NONE U32_MAX #define SAHARA_VERSION 2 #define SAHARA_SUCCESS 0 +#define SAHARA_TABLE_ENTRY_STR_LEN 20 #define SAHARA_MODE_IMAGE_TX_PENDING 0x0 #define SAHARA_MODE_IMAGE_TX_COMPLETE 0x1 @@ -53,6 +57,8 @@ #define SAHARA_END_OF_IMAGE_LENGTH 0x10 #define SAHARA_DONE_LENGTH 0x8 #define SAHARA_RESET_LENGTH 0x8 +#define SAHARA_MEM_DEBUG64_LENGTH 0x18 +#define SAHARA_MEM_READ64_LENGTH 0x18 struct sahara_packet { __le32 cmd; @@ -80,18 +86,95 @@ struct sahara_packet { __le32 image; __le32 status; } end_of_image; + struct { + __le64 table_address; + __le64 table_length; + } memory_debug64; + struct { + __le64 memory_address; + __le64 memory_length; + } memory_read64; }; }; +struct sahara_debug_table_entry64 { + __le64 type; + __le64 address; + __le64 length; + char description[SAHARA_TABLE_ENTRY_STR_LEN]; + char filename[SAHARA_TABLE_ENTRY_STR_LEN]; +}; + +struct sahara_dump_table_entry { + u64 type; + u64 address; + u64 length; + char description[SAHARA_TABLE_ENTRY_STR_LEN]; + char filename[SAHARA_TABLE_ENTRY_STR_LEN]; +}; + +#define SAHARA_DUMP_V1_MAGIC 0x1234567890abcdef +#define SAHARA_DUMP_V1_VER 1 +struct sahara_memory_dump_meta_v1 { + u64 magic; + u64 version; + u64 dump_size; + u64 table_size; +}; + +/* + * Layout of crashdump provided to user via devcoredump + * +------------------------------------------+ + * | Crashdump Meta structure | + * | type: struct sahara_memory_dump_meta_v1 | + * +------------------------------------------+ + * | Crashdump Table | + * | type: array of struct | + * | sahara_dump_table_entry | + * | | + * | | + * +------------------------------------------+ + * | Crashdump | + * | | + * | | + * | | + * | | + * | | + * +------------------------------------------+ + * + * First is the metadata header. Userspace can use the magic number to verify + * the content type, and then check the version for the rest of the format. + * New versions should keep the magic number location/value, and version + * location, but increment the version value. + * + * For v1, the metadata lists the size of the entire dump (header + table + + * dump) and the size of the table. Then the dump image table, which describes + * the contents of the dump. Finally all the images are listed in order, with + * no deadspace in between. Userspace can use the sizes listed in the image + * table to reconstruct the individual images. + */ + struct sahara_context { struct sahara_packet *tx[SAHARA_NUM_TX_BUF]; struct sahara_packet *rx; - struct work_struct work; + struct work_struct fw_work; + struct work_struct dump_work; struct mhi_device *mhi_dev; const char **image_table; u32 table_size; u32 active_image_id; const struct firmware *firmware; + u64 dump_table_address; + u64 dump_table_length; + size_t rx_size; + size_t rx_size_requested; + void *mem_dump; + size_t mem_dump_sz; + struct sahara_dump_table_entry *dump_image; + u64 dump_image_offset; + void *mem_dump_freespace; + u64 dump_images_left; + bool is_mem_dump_mode; }; static const char *aic100_image_table[] = { @@ -153,6 +236,8 @@ static void sahara_send_reset(struct sahara_context *context) { int ret; + context->is_mem_dump_mode = false; + context->tx[0]->cmd = cpu_to_le32(SAHARA_RESET_CMD); context->tx[0]->length = cpu_to_le32(SAHARA_RESET_LENGTH); @@ -186,7 +271,8 @@ static void sahara_hello(struct sahara_context *context) } if (le32_to_cpu(context->rx->hello.mode) != SAHARA_MODE_IMAGE_TX_PENDING && - le32_to_cpu(context->rx->hello.mode) != SAHARA_MODE_IMAGE_TX_COMPLETE) { + le32_to_cpu(context->rx->hello.mode) != SAHARA_MODE_IMAGE_TX_COMPLETE && + le32_to_cpu(context->rx->hello.mode) != SAHARA_MODE_MEMORY_DEBUG) { dev_err(&context->mhi_dev->dev, "Unsupported hello packet - mode %d\n", le32_to_cpu(context->rx->hello.mode)); return; @@ -320,9 +406,70 @@ static void sahara_end_of_image(struct sahara_context *context) dev_dbg(&context->mhi_dev->dev, "Unable to send done response %d\n", ret); } +static void sahara_memory_debug64(struct sahara_context *context) +{ + int ret; + + dev_dbg(&context->mhi_dev->dev, + "MEMORY DEBUG64 cmd received. length:%d table_address:%#llx table_length:%#llx\n", + le32_to_cpu(context->rx->length), + le64_to_cpu(context->rx->memory_debug64.table_address), + le64_to_cpu(context->rx->memory_debug64.table_length)); + + if (le32_to_cpu(context->rx->length) != SAHARA_MEM_DEBUG64_LENGTH) { + dev_err(&context->mhi_dev->dev, "Malformed memory debug64 packet - length %d\n", + le32_to_cpu(context->rx->length)); + return; + } + + context->dump_table_address = le64_to_cpu(context->rx->memory_debug64.table_address); + context->dump_table_length = le64_to_cpu(context->rx->memory_debug64.table_length); + + if (context->dump_table_length % sizeof(struct sahara_debug_table_entry64) != 0 || + !context->dump_table_length) { + dev_err(&context->mhi_dev->dev, "Malformed memory debug64 packet - table length %lld\n", + context->dump_table_length); + return; + } + + /* + * From this point, the protocol flips. We make memory_read requests to + * the device, and the device responds with the raw data. If the device + * has an error, it will send an End of Image command. First we need to + * request the memory dump table so that we know where all the pieces + * of the dump are that we can consume. + */ + + context->is_mem_dump_mode = true; + + /* + * Assume that the table is smaller than our MTU so that we can read it + * in one shot. The spec does not put an upper limit on the table, but + * no known device will exceed this. + */ + if (context->dump_table_length > SAHARA_PACKET_MAX_SIZE) { + dev_err(&context->mhi_dev->dev, "Memory dump table length %lld exceeds supported size. Discarding dump\n", + context->dump_table_length); + sahara_send_reset(context); + return; + } + + context->tx[0]->cmd = cpu_to_le32(SAHARA_MEM_READ64_CMD); + context->tx[0]->length = cpu_to_le32(SAHARA_MEM_READ64_LENGTH); + context->tx[0]->memory_read64.memory_address = cpu_to_le64(context->dump_table_address); + context->tx[0]->memory_read64.memory_length = cpu_to_le64(context->dump_table_length); + + context->rx_size_requested = context->dump_table_length; + + ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE, context->tx[0], + SAHARA_MEM_READ64_LENGTH, MHI_EOT); + if (ret) + dev_err(&context->mhi_dev->dev, "Unable to send read for dump table %d\n", ret); +} + static void sahara_processing(struct work_struct *work) { - struct sahara_context *context = container_of(work, struct sahara_context, work); + struct sahara_context *context = container_of(work, struct sahara_context, fw_work); int ret; switch (le32_to_cpu(context->rx->cmd)) { @@ -338,6 +485,12 @@ static void sahara_processing(struct work_struct *work) case SAHARA_DONE_RESP_CMD: /* Intentional do nothing as we don't need to exit an app */ break; + case SAHARA_RESET_RESP_CMD: + /* Intentional do nothing as we don't need to exit an app */ + break; + case SAHARA_MEM_DEBUG64_CMD: + sahara_memory_debug64(context); + break; default: dev_err(&context->mhi_dev->dev, "Unknown command %d\n", le32_to_cpu(context->rx->cmd)); @@ -350,6 +503,217 @@ static void sahara_processing(struct work_struct *work) dev_err(&context->mhi_dev->dev, "Unable to requeue rx buf %d\n", ret); } +static void sahara_parse_dump_table(struct sahara_context *context) +{ + struct sahara_dump_table_entry *image_out_table; + struct sahara_debug_table_entry64 *dev_table; + struct sahara_memory_dump_meta_v1 *dump_meta; + u64 table_nents; + u64 dump_length; + int ret; + u64 i; + + table_nents = context->dump_table_length / sizeof(*dev_table); + context->dump_images_left = table_nents; + dump_length = 0; + + dev_table = (struct sahara_debug_table_entry64 *)(context->rx); + for (i = 0; i < table_nents; ++i) { + /* Do not trust the device, ensure the strings are terminated */ + dev_table[i].description[SAHARA_TABLE_ENTRY_STR_LEN - 1] = 0; + dev_table[i].filename[SAHARA_TABLE_ENTRY_STR_LEN - 1] = 0; + + dump_length = size_add(dump_length, le64_to_cpu(dev_table[i].length)); + if (dump_length == SIZE_MAX) { + /* Discard the dump */ + sahara_send_reset(context); + return; + } + + dev_dbg(&context->mhi_dev->dev, + "Memory dump table entry %lld type: %lld address: %#llx length: %#llx description: \"%s\" filename \"%s\"\n", + i, + le64_to_cpu(dev_table[i].type), + le64_to_cpu(dev_table[i].address), + le64_to_cpu(dev_table[i].length), + dev_table[i].description, + dev_table[i].filename); + } + + dump_length = size_add(dump_length, sizeof(*dump_meta)); + if (dump_length == SIZE_MAX) { + /* Discard the dump */ + sahara_send_reset(context); + return; + } + dump_length = size_add(dump_length, size_mul(sizeof(*image_out_table), table_nents)); + if (dump_length == SIZE_MAX) { + /* Discard the dump */ + sahara_send_reset(context); + return; + } + + context->mem_dump_sz = dump_length; + context->mem_dump = vzalloc(dump_length); + if (!context->mem_dump) { + /* Discard the dump */ + sahara_send_reset(context); + return; + } + + /* Populate the dump metadata and table for userspace */ + dump_meta = context->mem_dump; + dump_meta->magic = SAHARA_DUMP_V1_MAGIC; + dump_meta->version = SAHARA_DUMP_V1_VER; + dump_meta->dump_size = dump_length; + dump_meta->table_size = context->dump_table_length; + + image_out_table = context->mem_dump + sizeof(*dump_meta); + for (i = 0; i < table_nents; ++i) { + image_out_table[i].type = le64_to_cpu(dev_table[i].type); + image_out_table[i].address = le64_to_cpu(dev_table[i].address); + image_out_table[i].length = le64_to_cpu(dev_table[i].length); + strscpy(image_out_table[i].description, dev_table[i].description, + SAHARA_TABLE_ENTRY_STR_LEN); + strscpy(image_out_table[i].filename, + dev_table[i].filename, + SAHARA_TABLE_ENTRY_STR_LEN); + } + + context->mem_dump_freespace = &image_out_table[i]; + + /* Done parsing the table, switch to image dump mode */ + context->dump_table_length = 0; + + /* Request the first chunk of the first image */ + context->dump_image = &image_out_table[0]; + dump_length = min(context->dump_image->length, SAHARA_READ_MAX_SIZE); + /* Avoid requesting EOI sized data so that we can identify errors */ + if (dump_length == SAHARA_END_OF_IMAGE_LENGTH) + dump_length = SAHARA_END_OF_IMAGE_LENGTH / 2; + + context->dump_image_offset = dump_length; + + context->tx[0]->cmd = cpu_to_le32(SAHARA_MEM_READ64_CMD); + context->tx[0]->length = cpu_to_le32(SAHARA_MEM_READ64_LENGTH); + context->tx[0]->memory_read64.memory_address = cpu_to_le64(context->dump_image->address); + context->tx[0]->memory_read64.memory_length = cpu_to_le64(dump_length); + + context->rx_size_requested = dump_length; + + ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE, context->tx[0], + SAHARA_MEM_READ64_LENGTH, MHI_EOT); + if (ret) + dev_err(&context->mhi_dev->dev, "Unable to send read for dump content %d\n", ret); +} + +static void sahara_parse_dump_image(struct sahara_context *context) +{ + u64 dump_length; + int ret; + + memcpy(context->mem_dump_freespace, context->rx, context->rx_size); + context->mem_dump_freespace += context->rx_size; + + if (context->dump_image_offset >= context->dump_image->length) { + /* Need to move to next image */ + context->dump_image++; + context->dump_images_left--; + context->dump_image_offset = 0; + + if (!context->dump_images_left) { + /* Dump done */ + dev_coredumpv(context->mhi_dev->mhi_cntrl->cntrl_dev, + context->mem_dump, + context->mem_dump_sz, + GFP_KERNEL); + context->mem_dump = NULL; + sahara_send_reset(context); + return; + } + } + + /* Get next image chunk */ + dump_length = context->dump_image->length - context->dump_image_offset; + dump_length = min(dump_length, SAHARA_READ_MAX_SIZE); + /* Avoid requesting EOI sized data so that we can identify errors */ + if (dump_length == SAHARA_END_OF_IMAGE_LENGTH) + dump_length = SAHARA_END_OF_IMAGE_LENGTH / 2; + + context->tx[0]->cmd = cpu_to_le32(SAHARA_MEM_READ64_CMD); + context->tx[0]->length = cpu_to_le32(SAHARA_MEM_READ64_LENGTH); + context->tx[0]->memory_read64.memory_address = + cpu_to_le64(context->dump_image->address + context->dump_image_offset); + context->tx[0]->memory_read64.memory_length = cpu_to_le64(dump_length); + + context->dump_image_offset += dump_length; + context->rx_size_requested = dump_length; + + ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE, context->tx[0], + SAHARA_MEM_READ64_LENGTH, MHI_EOT); + if (ret) + dev_err(&context->mhi_dev->dev, + "Unable to send read for dump content %d\n", ret); +} + +static void sahara_dump_processing(struct work_struct *work) +{ + struct sahara_context *context = container_of(work, struct sahara_context, dump_work); + int ret; + + /* + * We should get the expected raw data, but if the device has an error + * it is supposed to send EOI with an error code. + */ + if (context->rx_size != context->rx_size_requested && + context->rx_size != SAHARA_END_OF_IMAGE_LENGTH) { + dev_err(&context->mhi_dev->dev, + "Unexpected response to read_data. Expected size: %#zx got: %#zx\n", + context->rx_size_requested, + context->rx_size); + goto error; + } + + if (context->rx_size == SAHARA_END_OF_IMAGE_LENGTH && + le32_to_cpu(context->rx->cmd) == SAHARA_END_OF_IMAGE_CMD) { + dev_err(&context->mhi_dev->dev, + "Unexpected EOI response to read_data. Status: %d\n", + le32_to_cpu(context->rx->end_of_image.status)); + goto error; + } + + if (context->rx_size == SAHARA_END_OF_IMAGE_LENGTH && + le32_to_cpu(context->rx->cmd) != SAHARA_END_OF_IMAGE_CMD) { + dev_err(&context->mhi_dev->dev, + "Invalid EOI response to read_data. CMD: %d\n", + le32_to_cpu(context->rx->cmd)); + goto error; + } + + /* + * Need to know if we received the dump table, or part of a dump image. + * Since we get raw data, we cannot tell from the data itself. Instead, + * we use the stored dump_table_length, which we zero after we read and + * process the entire table. + */ + if (context->dump_table_length) + sahara_parse_dump_table(context); + else + sahara_parse_dump_image(context); + + ret = mhi_queue_buf(context->mhi_dev, DMA_FROM_DEVICE, context->rx, + SAHARA_PACKET_MAX_SIZE, MHI_EOT); + if (ret) + dev_err(&context->mhi_dev->dev, "Unable to requeue rx buf %d\n", ret); + + return; + +error: + vfree(context->mem_dump); + context->mem_dump = NULL; + sahara_send_reset(context); +} + static int sahara_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id) { struct sahara_context *context; @@ -382,7 +746,8 @@ static int sahara_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_ } context->mhi_dev = mhi_dev; - INIT_WORK(&context->work, sahara_processing); + INIT_WORK(&context->fw_work, sahara_processing); + INIT_WORK(&context->dump_work, sahara_dump_processing); context->image_table = aic100_image_table; context->table_size = ARRAY_SIZE(aic100_image_table); context->active_image_id = SAHARA_IMAGE_ID_NONE; @@ -405,7 +770,10 @@ static void sahara_mhi_remove(struct mhi_device *mhi_dev) { struct sahara_context *context = dev_get_drvdata(&mhi_dev->dev); - cancel_work_sync(&context->work); + cancel_work_sync(&context->fw_work); + cancel_work_sync(&context->dump_work); + if (context->mem_dump) + vfree(context->mem_dump); sahara_release_image(context); mhi_unprepare_from_transfer(mhi_dev); } @@ -418,8 +786,14 @@ static void sahara_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result { struct sahara_context *context = dev_get_drvdata(&mhi_dev->dev); - if (!mhi_result->transaction_status) - schedule_work(&context->work); + if (!mhi_result->transaction_status) { + context->rx_size = mhi_result->bytes_xferd; + if (context->is_mem_dump_mode) + schedule_work(&context->dump_work); + else + schedule_work(&context->fw_work); + } + } static const struct mhi_device_id sahara_mhi_match_table[] = { |