aboutsummaryrefslogtreecommitdiff
path: root/drivers/accel
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/accel')
-rw-r--r--drivers/accel/habanalabs/common/memory.c2
-rw-r--r--drivers/accel/ivpu/Kconfig10
-rw-r--r--drivers/accel/ivpu/Makefile8
-rw-r--r--drivers/accel/ivpu/ivpu_coredump.c39
-rw-r--r--drivers/accel/ivpu/ivpu_coredump.h25
-rw-r--r--drivers/accel/ivpu/ivpu_debugfs.c86
-rw-r--r--drivers/accel/ivpu/ivpu_drv.c70
-rw-r--r--drivers/accel/ivpu/ivpu_drv.h35
-rw-r--r--drivers/accel/ivpu/ivpu_fw.c37
-rw-r--r--drivers/accel/ivpu/ivpu_fw.h9
-rw-r--r--drivers/accel/ivpu/ivpu_fw_log.c113
-rw-r--r--drivers/accel/ivpu/ivpu_fw_log.h17
-rw-r--r--drivers/accel/ivpu/ivpu_gem.c5
-rw-r--r--drivers/accel/ivpu/ivpu_hw.c15
-rw-r--r--drivers/accel/ivpu/ivpu_hw.h1
-rw-r--r--drivers/accel/ivpu/ivpu_hw_40xx_reg.h2
-rw-r--r--drivers/accel/ivpu/ivpu_hw_btrs.c21
-rw-r--r--drivers/accel/ivpu/ivpu_hw_ip.c57
-rw-r--r--drivers/accel/ivpu/ivpu_ipc.c45
-rw-r--r--drivers/accel/ivpu/ivpu_ipc.h9
-rw-r--r--drivers/accel/ivpu/ivpu_job.c190
-rw-r--r--drivers/accel/ivpu/ivpu_job.h2
-rw-r--r--drivers/accel/ivpu/ivpu_jsm_msg.c42
-rw-r--r--drivers/accel/ivpu/ivpu_jsm_msg.h2
-rw-r--r--drivers/accel/ivpu/ivpu_mmu.c101
-rw-r--r--drivers/accel/ivpu/ivpu_mmu.h4
-rw-r--r--drivers/accel/ivpu/ivpu_mmu_context.c162
-rw-r--r--drivers/accel/ivpu/ivpu_mmu_context.h9
-rw-r--r--drivers/accel/ivpu/ivpu_ms.c2
-rw-r--r--drivers/accel/ivpu/ivpu_pm.c28
-rw-r--r--drivers/accel/ivpu/ivpu_sysfs.c24
-rw-r--r--drivers/accel/ivpu/ivpu_trace.h73
-rw-r--r--drivers/accel/ivpu/ivpu_trace_points.c9
-rw-r--r--drivers/accel/ivpu/vpu_boot_api.h45
-rw-r--r--drivers/accel/ivpu/vpu_jsm_api.h303
-rw-r--r--drivers/accel/qaic/mhi_controller.c32
-rw-r--r--drivers/accel/qaic/qaic_debugfs.c43
-rw-r--r--drivers/accel/qaic/qaic_drv.c10
-rw-r--r--drivers/accel/qaic/sahara.c388
39 files changed, 1475 insertions, 600 deletions
diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c
index 3348ad12c237..601fdbe70179 100644
--- a/drivers/accel/habanalabs/common/memory.c
+++ b/drivers/accel/habanalabs/common/memory.c
@@ -14,7 +14,7 @@
#include <linux/vmalloc.h>
#include <linux/pci-p2pdma.h>
-MODULE_IMPORT_NS(DMA_BUF);
+MODULE_IMPORT_NS("DMA_BUF");
#define HL_MMU_DEBUG 0
diff --git a/drivers/accel/ivpu/Kconfig b/drivers/accel/ivpu/Kconfig
index 682c53245286..9e055b5ce03d 100644
--- a/drivers/accel/ivpu/Kconfig
+++ b/drivers/accel/ivpu/Kconfig
@@ -8,6 +8,7 @@ config DRM_ACCEL_IVPU
select FW_LOADER
select DRM_GEM_SHMEM_HELPER
select GENERIC_ALLOCATOR
+ select WANT_DEV_COREDUMP
help
Choose this option if you have a system with an 14th generation
Intel CPU (Meteor Lake) or newer. Intel NPU (formerly called Intel VPU)
@@ -15,3 +16,12 @@ config DRM_ACCEL_IVPU
and Deep Learning applications.
If "M" is selected, the module will be called intel_vpu.
+
+config DRM_ACCEL_IVPU_DEBUG
+ bool "Intel NPU debug mode"
+ depends on DRM_ACCEL_IVPU
+ help
+ Choose this option to enable additional
+ debug features for the Intel NPU driver:
+ - Always print debug messages regardless of dyndbg config,
+ - Enable unsafe module params.
diff --git a/drivers/accel/ivpu/Makefile b/drivers/accel/ivpu/Makefile
index ebd682a42eb1..1029e0bab061 100644
--- a/drivers/accel/ivpu/Makefile
+++ b/drivers/accel/ivpu/Makefile
@@ -16,8 +16,14 @@ intel_vpu-y := \
ivpu_mmu_context.o \
ivpu_ms.o \
ivpu_pm.o \
- ivpu_sysfs.o
+ ivpu_sysfs.o \
+ ivpu_trace_points.o
intel_vpu-$(CONFIG_DEBUG_FS) += ivpu_debugfs.o
+intel_vpu-$(CONFIG_DEV_COREDUMP) += ivpu_coredump.o
obj-$(CONFIG_DRM_ACCEL_IVPU) += intel_vpu.o
+
+subdir-ccflags-$(CONFIG_DRM_ACCEL_IVPU_DEBUG) += -DDEBUG
+
+CFLAGS_ivpu_trace_points.o = -I$(src)
diff --git a/drivers/accel/ivpu/ivpu_coredump.c b/drivers/accel/ivpu/ivpu_coredump.c
new file mode 100644
index 000000000000..16ad0c30818c
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_coredump.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-2024 Intel Corporation
+ */
+
+#include <linux/devcoredump.h>
+#include <linux/firmware.h>
+
+#include "ivpu_coredump.h"
+#include "ivpu_fw.h"
+#include "ivpu_gem.h"
+#include "vpu_boot_api.h"
+
+#define CRASH_DUMP_HEADER "Intel NPU crash dump"
+#define CRASH_DUMP_HEADERS_SIZE SZ_4K
+
+void ivpu_dev_coredump(struct ivpu_device *vdev)
+{
+ struct drm_print_iterator pi = {};
+ struct drm_printer p;
+ size_t coredump_size;
+ char *coredump;
+
+ coredump_size = CRASH_DUMP_HEADERS_SIZE + FW_VERSION_HEADER_SIZE +
+ ivpu_bo_size(vdev->fw->mem_log_crit) + ivpu_bo_size(vdev->fw->mem_log_verb);
+ coredump = vmalloc(coredump_size);
+ if (!coredump)
+ return;
+
+ pi.data = coredump;
+ pi.remain = coredump_size;
+ p = drm_coredump_printer(&pi);
+
+ drm_printf(&p, "%s\n", CRASH_DUMP_HEADER);
+ drm_printf(&p, "FW version: %s\n", vdev->fw->version);
+ ivpu_fw_log_print(vdev, false, &p);
+
+ dev_coredumpv(vdev->drm.dev, coredump, pi.offset, GFP_KERNEL);
+}
diff --git a/drivers/accel/ivpu/ivpu_coredump.h b/drivers/accel/ivpu/ivpu_coredump.h
new file mode 100644
index 000000000000..8efb09d02441
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_coredump.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020-2024 Intel Corporation
+ */
+
+#ifndef __IVPU_COREDUMP_H__
+#define __IVPU_COREDUMP_H__
+
+#include <drm/drm_print.h>
+
+#include "ivpu_drv.h"
+#include "ivpu_fw_log.h"
+
+#ifdef CONFIG_DEV_COREDUMP
+void ivpu_dev_coredump(struct ivpu_device *vdev);
+#else
+static inline void ivpu_dev_coredump(struct ivpu_device *vdev)
+{
+ struct drm_printer p = drm_info_printer(vdev->drm.dev);
+
+ ivpu_fw_log_print(vdev, false, &p);
+}
+#endif
+
+#endif /* __IVPU_COREDUMP_H__ */
diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c
index 8d50981594d1..8180b95ed69d 100644
--- a/drivers/accel/ivpu/ivpu_debugfs.c
+++ b/drivers/accel/ivpu/ivpu_debugfs.c
@@ -45,6 +45,14 @@ static int fw_name_show(struct seq_file *s, void *v)
return 0;
}
+static int fw_version_show(struct seq_file *s, void *v)
+{
+ struct ivpu_device *vdev = seq_to_ivpu(s);
+
+ seq_printf(s, "%s\n", vdev->fw->version);
+ return 0;
+}
+
static int fw_trace_capability_show(struct seq_file *s, void *v)
{
struct ivpu_device *vdev = seq_to_ivpu(s);
@@ -119,6 +127,7 @@ static int firewall_irq_counter_show(struct seq_file *s, void *v)
static const struct drm_debugfs_info vdev_debugfs_list[] = {
{"bo_list", bo_list_show, 0},
{"fw_name", fw_name_show, 0},
+ {"fw_version", fw_version_show, 0},
{"fw_trace_capability", fw_trace_capability_show, 0},
{"fw_trace_config", fw_trace_config_show, 0},
{"last_bootmode", last_bootmode_show, 0},
@@ -127,32 +136,23 @@ static const struct drm_debugfs_info vdev_debugfs_list[] = {
{"firewall_irq_counter", firewall_irq_counter_show, 0},
};
-static ssize_t
-dvfs_mode_fops_write(struct file *file, const char __user *user_buf, size_t size, loff_t *pos)
+static int dvfs_mode_get(void *data, u64 *dvfs_mode)
{
- struct ivpu_device *vdev = file->private_data;
- struct ivpu_fw_info *fw = vdev->fw;
- u32 dvfs_mode;
- int ret;
-
- ret = kstrtou32_from_user(user_buf, size, 0, &dvfs_mode);
- if (ret < 0)
- return ret;
+ struct ivpu_device *vdev = (struct ivpu_device *)data;
- fw->dvfs_mode = dvfs_mode;
+ *dvfs_mode = vdev->fw->dvfs_mode;
+ return 0;
+}
- ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev));
- if (ret)
- return ret;
+static int dvfs_mode_set(void *data, u64 dvfs_mode)
+{
+ struct ivpu_device *vdev = (struct ivpu_device *)data;
- return size;
+ vdev->fw->dvfs_mode = (u32)dvfs_mode;
+ return pci_try_reset_function(to_pci_dev(vdev->drm.dev));
}
-static const struct file_operations dvfs_mode_fops = {
- .owner = THIS_MODULE,
- .open = simple_open,
- .write = dvfs_mode_fops_write,
-};
+DEFINE_DEBUGFS_ATTRIBUTE(dvfs_mode_fops, dvfs_mode_get, dvfs_mode_set, "%llu\n");
static ssize_t
fw_dyndbg_fops_write(struct file *file, const char __user *user_buf, size_t size, loff_t *pos)
@@ -201,7 +201,7 @@ fw_log_fops_write(struct file *file, const char __user *user_buf, size_t size, l
if (!size)
return -EINVAL;
- ivpu_fw_log_clear(vdev);
+ ivpu_fw_log_mark_read(vdev);
return size;
}
@@ -346,49 +346,23 @@ static const struct file_operations ivpu_force_recovery_fops = {
.write = ivpu_force_recovery_fn,
};
-static ssize_t
-ivpu_reset_engine_fn(struct file *file, const char __user *user_buf, size_t size, loff_t *pos)
+static int ivpu_reset_engine_fn(void *data, u64 val)
{
- struct ivpu_device *vdev = file->private_data;
-
- if (!size)
- return -EINVAL;
+ struct ivpu_device *vdev = (struct ivpu_device *)data;
- if (ivpu_jsm_reset_engine(vdev, DRM_IVPU_ENGINE_COMPUTE))
- return -ENODEV;
- if (ivpu_jsm_reset_engine(vdev, DRM_IVPU_ENGINE_COPY))
- return -ENODEV;
-
- return size;
+ return ivpu_jsm_reset_engine(vdev, (u32)val);
}
-static const struct file_operations ivpu_reset_engine_fops = {
- .owner = THIS_MODULE,
- .open = simple_open,
- .write = ivpu_reset_engine_fn,
-};
+DEFINE_DEBUGFS_ATTRIBUTE(ivpu_reset_engine_fops, NULL, ivpu_reset_engine_fn, "0x%02llx\n");
-static ssize_t
-ivpu_resume_engine_fn(struct file *file, const char __user *user_buf, size_t size, loff_t *pos)
+static int ivpu_resume_engine_fn(void *data, u64 val)
{
- struct ivpu_device *vdev = file->private_data;
-
- if (!size)
- return -EINVAL;
+ struct ivpu_device *vdev = (struct ivpu_device *)data;
- if (ivpu_jsm_hws_resume_engine(vdev, DRM_IVPU_ENGINE_COMPUTE))
- return -ENODEV;
- if (ivpu_jsm_hws_resume_engine(vdev, DRM_IVPU_ENGINE_COPY))
- return -ENODEV;
-
- return size;
+ return ivpu_jsm_hws_resume_engine(vdev, (u32)val);
}
-static const struct file_operations ivpu_resume_engine_fops = {
- .owner = THIS_MODULE,
- .open = simple_open,
- .write = ivpu_resume_engine_fn,
-};
+DEFINE_DEBUGFS_ATTRIBUTE(ivpu_resume_engine_fops, NULL, ivpu_resume_engine_fn, "0x%02llx\n");
static int dct_active_get(void *data, u64 *active_percent)
{
@@ -432,7 +406,7 @@ void ivpu_debugfs_init(struct ivpu_device *vdev)
debugfs_create_file("force_recovery", 0200, debugfs_root, vdev,
&ivpu_force_recovery_fops);
- debugfs_create_file("dvfs_mode", 0200, debugfs_root, vdev,
+ debugfs_create_file("dvfs_mode", 0644, debugfs_root, vdev,
&dvfs_mode_fops);
debugfs_create_file("fw_dyndbg", 0200, debugfs_root, vdev,
diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index c91400ecf926..ca2bf47ce248 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -7,6 +7,7 @@
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/pm_runtime.h>
+#include <generated/utsrelease.h>
#include <drm/drm_accel.h>
#include <drm/drm_file.h>
@@ -14,7 +15,7 @@
#include <drm/drm_ioctl.h>
#include <drm/drm_prime.h>
-#include "vpu_boot_api.h"
+#include "ivpu_coredump.h"
#include "ivpu_debugfs.h"
#include "ivpu_drv.h"
#include "ivpu_fw.h"
@@ -29,10 +30,10 @@
#include "ivpu_ms.h"
#include "ivpu_pm.h"
#include "ivpu_sysfs.h"
+#include "vpu_boot_api.h"
#ifndef DRIVER_VERSION_STR
-#define DRIVER_VERSION_STR __stringify(DRM_IVPU_DRIVER_MAJOR) "." \
- __stringify(DRM_IVPU_DRIVER_MINOR) "."
+#define DRIVER_VERSION_STR "1.0.0 " UTS_RELEASE
#endif
static struct lock_class_key submitted_jobs_xa_lock_class_key;
@@ -42,8 +43,10 @@ module_param_named(dbg_mask, ivpu_dbg_mask, int, 0644);
MODULE_PARM_DESC(dbg_mask, "Driver debug mask. See IVPU_DBG_* macros.");
int ivpu_test_mode;
+#if IS_ENABLED(CONFIG_DRM_ACCEL_IVPU_DEBUG)
module_param_named_unsafe(test_mode, ivpu_test_mode, int, 0644);
MODULE_PARM_DESC(test_mode, "Test mode mask. See IVPU_TEST_MODE_* macros.");
+#endif
u8 ivpu_pll_min_ratio;
module_param_named(pll_min_ratio, ivpu_pll_min_ratio, byte, 0644);
@@ -53,9 +56,9 @@ u8 ivpu_pll_max_ratio = U8_MAX;
module_param_named(pll_max_ratio, ivpu_pll_max_ratio, byte, 0644);
MODULE_PARM_DESC(pll_max_ratio, "Maximum PLL ratio used to set NPU frequency");
-int ivpu_sched_mode;
+int ivpu_sched_mode = IVPU_SCHED_MODE_AUTO;
module_param_named(sched_mode, ivpu_sched_mode, int, 0444);
-MODULE_PARM_DESC(sched_mode, "Scheduler mode: 0 - Default scheduler, 1 - Force HW scheduler");
+MODULE_PARM_DESC(sched_mode, "Scheduler mode: -1 - Use default scheduler, 0 - Use OS scheduler, 1 - Use HW scheduler");
bool ivpu_disable_mmu_cont_pages;
module_param_named(disable_mmu_cont_pages, ivpu_disable_mmu_cont_pages, bool, 0444);
@@ -85,7 +88,7 @@ static void file_priv_unbind(struct ivpu_device *vdev, struct ivpu_file_priv *fi
ivpu_cmdq_release_all_locked(file_priv);
ivpu_bo_unbind_all_bos_from_context(vdev, &file_priv->ctx);
- ivpu_mmu_user_context_fini(vdev, &file_priv->ctx);
+ ivpu_mmu_context_fini(vdev, &file_priv->ctx);
file_priv->bound = false;
drm_WARN_ON(&vdev->drm, !xa_erase_irq(&vdev->context_xa, file_priv->ctx.id));
}
@@ -103,6 +106,8 @@ static void file_priv_release(struct kref *ref)
pm_runtime_get_sync(vdev->drm.dev);
mutex_lock(&vdev->context_list_lock);
file_priv_unbind(vdev, file_priv);
+ drm_WARN_ON(&vdev->drm, !xa_empty(&file_priv->cmdq_xa));
+ xa_destroy(&file_priv->cmdq_xa);
mutex_unlock(&vdev->context_list_lock);
pm_runtime_put_autosuspend(vdev->drm.dev);
@@ -116,8 +121,6 @@ void ivpu_file_priv_put(struct ivpu_file_priv **link)
struct ivpu_file_priv *file_priv = *link;
struct ivpu_device *vdev = file_priv->vdev;
- drm_WARN_ON(&vdev->drm, !file_priv);
-
ivpu_dbg(vdev, KREF, "file_priv put: ctx %u refcount %u\n",
file_priv->ctx.id, kref_read(&file_priv->ref));
@@ -255,9 +258,14 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file)
goto err_unlock;
}
- ret = ivpu_mmu_user_context_init(vdev, &file_priv->ctx, ctx_id);
- if (ret)
- goto err_xa_erase;
+ ivpu_mmu_context_init(vdev, &file_priv->ctx, ctx_id);
+
+ file_priv->job_limit.min = FIELD_PREP(IVPU_JOB_ID_CONTEXT_MASK, (file_priv->ctx.id - 1));
+ file_priv->job_limit.max = file_priv->job_limit.min | IVPU_JOB_ID_JOB_MASK;
+
+ xa_init_flags(&file_priv->cmdq_xa, XA_FLAGS_ALLOC1);
+ file_priv->cmdq_limit.min = IVPU_CMDQ_MIN_ID;
+ file_priv->cmdq_limit.max = IVPU_CMDQ_MAX_ID;
mutex_unlock(&vdev->context_list_lock);
drm_dev_exit(idx);
@@ -269,8 +277,6 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file)
return 0;
-err_xa_erase:
- xa_erase_irq(&vdev->context_xa, ctx_id);
err_unlock:
mutex_unlock(&vdev->context_list_lock);
mutex_destroy(&file_priv->ms_lock);
@@ -346,7 +352,7 @@ static int ivpu_hw_sched_init(struct ivpu_device *vdev)
{
int ret = 0;
- if (vdev->hw->sched_mode == VPU_SCHEDULING_MODE_HW) {
+ if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) {
ret = ivpu_jsm_hws_setup_priority_bands(vdev);
if (ret) {
ivpu_err(vdev, "Failed to enable hw scheduler: %d", ret);
@@ -380,10 +386,7 @@ int ivpu_boot(struct ivpu_device *vdev)
ret = ivpu_wait_for_ready(vdev);
if (ret) {
ivpu_err(vdev, "Failed to boot the firmware: %d\n", ret);
- ivpu_hw_diagnose_failure(vdev);
- ivpu_mmu_evtq_dump(vdev);
- ivpu_fw_log_dump(vdev);
- return ret;
+ goto err_diagnose_failure;
}
ivpu_hw_irq_clear(vdev);
@@ -394,12 +397,20 @@ int ivpu_boot(struct ivpu_device *vdev)
if (ivpu_fw_is_cold_boot(vdev)) {
ret = ivpu_pm_dct_init(vdev);
if (ret)
- return ret;
+ goto err_diagnose_failure;
- return ivpu_hw_sched_init(vdev);
+ ret = ivpu_hw_sched_init(vdev);
+ if (ret)
+ goto err_diagnose_failure;
}
return 0;
+
+err_diagnose_failure:
+ ivpu_hw_diagnose_failure(vdev);
+ ivpu_mmu_evtq_dump(vdev);
+ ivpu_dev_coredump(vdev);
+ return ret;
}
void ivpu_prepare_for_reset(struct ivpu_device *vdev)
@@ -446,9 +457,16 @@ static const struct drm_driver driver = {
.name = DRIVER_NAME,
.desc = DRIVER_DESC,
+
+#ifdef DRIVER_DATE
.date = DRIVER_DATE,
- .major = DRM_IVPU_DRIVER_MAJOR,
- .minor = DRM_IVPU_DRIVER_MINOR,
+ .major = DRIVER_MAJOR,
+ .minor = DRIVER_MINOR,
+ .patchlevel = DRIVER_PATCHLEVEL,
+#else
+ .date = UTS_RELEASE,
+ .major = 1,
+#endif
};
static void ivpu_context_abort_invalid(struct ivpu_device *vdev)
@@ -606,6 +624,9 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key);
INIT_LIST_HEAD(&vdev->bo_list);
+ vdev->db_limit.min = IVPU_MIN_DB;
+ vdev->db_limit.max = IVPU_MAX_DB;
+
ret = drmm_mutex_init(&vdev->drm, &vdev->context_list_lock);
if (ret)
goto err_xa_destroy;
@@ -632,9 +653,7 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
if (ret)
goto err_shutdown;
- ret = ivpu_mmu_global_context_init(vdev);
- if (ret)
- goto err_shutdown;
+ ivpu_mmu_global_context_init(vdev);
ret = ivpu_mmu_init(vdev);
if (ret)
@@ -722,6 +741,7 @@ static struct pci_device_id ivpu_pci_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_MTL) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_ARL) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_LNL) },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PTL_P) },
{ }
};
MODULE_DEVICE_TABLE(pci, ivpu_pci_ids);
diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h
index 63f13b697eed..3fdff3f6cffd 100644
--- a/drivers/accel/ivpu/ivpu_drv.h
+++ b/drivers/accel/ivpu/ivpu_drv.h
@@ -21,11 +21,11 @@
#define DRIVER_NAME "intel_vpu"
#define DRIVER_DESC "Driver for Intel NPU (Neural Processing Unit)"
-#define DRIVER_DATE "20230117"
-#define PCI_DEVICE_ID_MTL 0x7d1d
-#define PCI_DEVICE_ID_ARL 0xad1d
-#define PCI_DEVICE_ID_LNL 0x643e
+#define PCI_DEVICE_ID_MTL 0x7d1d
+#define PCI_DEVICE_ID_ARL 0xad1d
+#define PCI_DEVICE_ID_LNL 0x643e
+#define PCI_DEVICE_ID_PTL_P 0xb03e
#define IVPU_HW_IP_37XX 37
#define IVPU_HW_IP_40XX 40
@@ -46,17 +46,22 @@
#define IVPU_MIN_DB 1
#define IVPU_MAX_DB 255
-#define IVPU_NUM_ENGINES 2
+#define IVPU_JOB_ID_JOB_MASK GENMASK(7, 0)
+#define IVPU_JOB_ID_CONTEXT_MASK GENMASK(31, 8)
+
#define IVPU_NUM_PRIORITIES 4
-#define IVPU_NUM_CMDQS_PER_CTX (IVPU_NUM_ENGINES * IVPU_NUM_PRIORITIES)
+#define IVPU_NUM_CMDQS_PER_CTX (IVPU_NUM_PRIORITIES)
-#define IVPU_CMDQ_INDEX(engine, priority) ((engine) * IVPU_NUM_PRIORITIES + (priority))
+#define IVPU_CMDQ_MIN_ID 1
+#define IVPU_CMDQ_MAX_ID 255
#define IVPU_PLATFORM_SILICON 0
#define IVPU_PLATFORM_SIMICS 2
#define IVPU_PLATFORM_FPGA 3
#define IVPU_PLATFORM_INVALID 8
+#define IVPU_SCHED_MODE_AUTO -1
+
#define IVPU_DBG_REG BIT(0)
#define IVPU_DBG_IRQ BIT(1)
#define IVPU_DBG_MMU BIT(2)
@@ -134,6 +139,8 @@ struct ivpu_device {
struct xa_limit context_xa_limit;
struct xarray db_xa;
+ struct xa_limit db_limit;
+ u32 db_next;
struct mutex bo_list_lock; /* Protects bo_list */
struct list_head bo_list;
@@ -152,6 +159,7 @@ struct ivpu_device {
int tdr;
int autosuspend;
int d0i3_entry_msg;
+ int state_dump_msg;
} timeout;
};
@@ -163,11 +171,15 @@ struct ivpu_file_priv {
struct kref ref;
struct ivpu_device *vdev;
struct mutex lock; /* Protects cmdq */
- struct ivpu_cmdq *cmdq[IVPU_NUM_CMDQS_PER_CTX];
+ struct xarray cmdq_xa;
struct ivpu_mmu_context ctx;
struct mutex ms_lock; /* Protects ms_instance_list, ms_info_bo */
struct list_head ms_instance_list;
struct ivpu_bo *ms_info_bo;
+ struct xa_limit job_limit;
+ u32 job_id_next;
+ struct xa_limit cmdq_limit;
+ u32 cmdq_id_next;
bool has_mmu_faults;
bool bound;
bool aborted;
@@ -185,9 +197,9 @@ extern bool ivpu_force_snoop;
#define IVPU_TEST_MODE_NULL_SUBMISSION BIT(2)
#define IVPU_TEST_MODE_D0I3_MSG_DISABLE BIT(4)
#define IVPU_TEST_MODE_D0I3_MSG_ENABLE BIT(5)
-#define IVPU_TEST_MODE_PREEMPTION_DISABLE BIT(6)
-#define IVPU_TEST_MODE_HWS_EXTRA_EVENTS BIT(7)
+#define IVPU_TEST_MODE_MIP_DISABLE BIT(6)
#define IVPU_TEST_MODE_DISABLE_TIMEOUTS BIT(8)
+#define IVPU_TEST_MODE_TURBO BIT(9)
extern int ivpu_test_mode;
struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv);
@@ -215,6 +227,8 @@ static inline int ivpu_hw_ip_gen(struct ivpu_device *vdev)
return IVPU_HW_IP_37XX;
case PCI_DEVICE_ID_LNL:
return IVPU_HW_IP_40XX;
+ case PCI_DEVICE_ID_PTL_P:
+ return IVPU_HW_IP_50XX;
default:
dump_stack();
ivpu_err(vdev, "Unknown NPU IP generation\n");
@@ -229,6 +243,7 @@ static inline int ivpu_hw_btrs_gen(struct ivpu_device *vdev)
case PCI_DEVICE_ID_ARL:
return IVPU_HW_BTRS_MTL;
case PCI_DEVICE_ID_LNL:
+ case PCI_DEVICE_ID_PTL_P:
return IVPU_HW_BTRS_LNL;
default:
dump_stack();
diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c
index ede6165e09d9..6037ec0b3096 100644
--- a/drivers/accel/ivpu/ivpu_fw.c
+++ b/drivers/accel/ivpu/ivpu_fw.c
@@ -25,7 +25,6 @@
#define FW_SHAVE_NN_MAX_SIZE SZ_2M
#define FW_RUNTIME_MIN_ADDR (FW_GLOBAL_MEM_START)
#define FW_RUNTIME_MAX_ADDR (FW_GLOBAL_MEM_END - FW_SHARED_MEM_SIZE)
-#define FW_VERSION_HEADER_SIZE SZ_4K
#define FW_FILE_IMAGE_OFFSET (VPU_FW_HEADER_SIZE + FW_VERSION_HEADER_SIZE)
#define WATCHDOG_MSS_REDIRECT 32
@@ -47,8 +46,10 @@
#define IVPU_FOCUS_PRESENT_TIMER_MS 1000
static char *ivpu_firmware;
+#if IS_ENABLED(CONFIG_DRM_ACCEL_IVPU_DEBUG)
module_param_named_unsafe(firmware, ivpu_firmware, charp, 0644);
MODULE_PARM_DESC(firmware, "NPU firmware binary in /lib/firmware/..");
+#endif
static struct {
int gen;
@@ -58,11 +59,14 @@ static struct {
{ IVPU_HW_IP_37XX, "intel/vpu/vpu_37xx_v0.0.bin" },
{ IVPU_HW_IP_40XX, "vpu_40xx.bin" },
{ IVPU_HW_IP_40XX, "intel/vpu/vpu_40xx_v0.0.bin" },
+ { IVPU_HW_IP_50XX, "vpu_50xx.bin" },
+ { IVPU_HW_IP_50XX, "intel/vpu/vpu_50xx_v0.0.bin" },
};
/* Production fw_names from the table above */
MODULE_FIRMWARE("intel/vpu/vpu_37xx_v0.0.bin");
MODULE_FIRMWARE("intel/vpu/vpu_40xx_v0.0.bin");
+MODULE_FIRMWARE("intel/vpu/vpu_50xx_v0.0.bin");
static int ivpu_fw_request(struct ivpu_device *vdev)
{
@@ -135,6 +139,15 @@ static bool is_within_range(u64 addr, size_t size, u64 range_start, size_t range
return true;
}
+static u32
+ivpu_fw_sched_mode_select(struct ivpu_device *vdev, const struct vpu_firmware_header *fw_hdr)
+{
+ if (ivpu_sched_mode != IVPU_SCHED_MODE_AUTO)
+ return ivpu_sched_mode;
+
+ return VPU_SCHEDULING_MODE_OS;
+}
+
static int ivpu_fw_parse(struct ivpu_device *vdev)
{
struct ivpu_fw_info *fw = vdev->fw;
@@ -191,8 +204,10 @@ static int ivpu_fw_parse(struct ivpu_device *vdev)
ivpu_dbg(vdev, FW_BOOT, "Header version: 0x%x, format 0x%x\n",
fw_hdr->header_version, fw_hdr->image_format);
- ivpu_info(vdev, "Firmware: %s, version: %s", fw->name,
- (const char *)fw_hdr + VPU_FW_HEADER_SIZE);
+ if (!scnprintf(fw->version, sizeof(fw->version), "%s", fw->file->data + VPU_FW_HEADER_SIZE))
+ ivpu_warn(vdev, "Missing firmware version\n");
+
+ ivpu_info(vdev, "Firmware: %s, version: %s\n", fw->name, fw->version);
if (IVPU_FW_CHECK_API_COMPAT(vdev, fw_hdr, BOOT, 3))
return -EINVAL;
@@ -208,14 +223,16 @@ static int ivpu_fw_parse(struct ivpu_device *vdev)
fw->cold_boot_entry_point = fw_hdr->entry_point;
fw->entry_point = fw->cold_boot_entry_point;
- fw->trace_level = min_t(u32, ivpu_log_level, IVPU_FW_LOG_FATAL);
+ fw->trace_level = min_t(u32, ivpu_fw_log_level, IVPU_FW_LOG_FATAL);
fw->trace_destination_mask = VPU_TRACE_DESTINATION_VERBOSE_TRACING;
fw->trace_hw_component_mask = -1;
fw->dvfs_mode = 0;
+ fw->sched_mode = ivpu_fw_sched_mode_select(vdev, fw_hdr);
fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_size;
fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_size;
+ ivpu_info(vdev, "Scheduler mode: %s\n", fw->sched_mode ? "HW" : "OS");
if (fw_hdr->ro_section_start_address && !is_within_range(fw_hdr->ro_section_start_address,
fw_hdr->ro_section_size,
@@ -311,7 +328,7 @@ static int ivpu_fw_mem_init(struct ivpu_device *vdev)
goto err_free_fw_mem;
}
- if (ivpu_log_level <= IVPU_FW_LOG_INFO)
+ if (ivpu_fw_log_level <= IVPU_FW_LOG_INFO)
log_verb_size = IVPU_FW_VERBOSE_BUFFER_LARGE_SIZE;
else
log_verb_size = IVPU_FW_VERBOSE_BUFFER_SMALL_SIZE;
@@ -567,8 +584,10 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
boot_params->ipc_payload_area_start = ipc_mem_rx->vpu_addr + ivpu_bo_size(ipc_mem_rx) / 2;
boot_params->ipc_payload_area_size = ivpu_bo_size(ipc_mem_rx) / 2;
- boot_params->global_aliased_pio_base = vdev->hw->ranges.user.start;
- boot_params->global_aliased_pio_size = ivpu_hw_range_size(&vdev->hw->ranges.user);
+ if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) {
+ boot_params->global_aliased_pio_base = vdev->hw->ranges.user.start;
+ boot_params->global_aliased_pio_size = ivpu_hw_range_size(&vdev->hw->ranges.user);
+ }
/* Allow configuration for L2C_PAGE_TABLE with boot param value */
boot_params->autoconfig = 1;
@@ -604,8 +623,8 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
boot_params->punit_telemetry_sram_base = ivpu_hw_telemetry_offset_get(vdev);
boot_params->punit_telemetry_sram_size = ivpu_hw_telemetry_size_get(vdev);
boot_params->vpu_telemetry_enable = ivpu_hw_telemetry_enable_get(vdev);
- boot_params->vpu_scheduling_mode = vdev->hw->sched_mode;
- if (vdev->hw->sched_mode == VPU_SCHEDULING_MODE_HW)
+ boot_params->vpu_scheduling_mode = vdev->fw->sched_mode;
+ if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW)
boot_params->vpu_focus_present_timer_ms = IVPU_FOCUS_PRESENT_TIMER_MS;
boot_params->dvfs_mode = vdev->fw->dvfs_mode;
if (!IVPU_WA(disable_d0i3_msg))
diff --git a/drivers/accel/ivpu/ivpu_fw.h b/drivers/accel/ivpu/ivpu_fw.h
index 40d9d17be3f5..1d0b2bd9d65c 100644
--- a/drivers/accel/ivpu/ivpu_fw.h
+++ b/drivers/accel/ivpu/ivpu_fw.h
@@ -1,11 +1,16 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
*/
#ifndef __IVPU_FW_H__
#define __IVPU_FW_H__
+#include "vpu_jsm_api.h"
+
+#define FW_VERSION_HEADER_SIZE SZ_4K
+#define FW_VERSION_STR_SIZE SZ_256
+
struct ivpu_device;
struct ivpu_bo;
struct vpu_boot_params;
@@ -13,6 +18,7 @@ struct vpu_boot_params;
struct ivpu_fw_info {
const struct firmware *file;
const char *name;
+ char version[FW_VERSION_STR_SIZE];
struct ivpu_bo *mem;
struct ivpu_bo *mem_shave_nn;
struct ivpu_bo *mem_log_crit;
@@ -32,6 +38,7 @@ struct ivpu_fw_info {
u32 secondary_preempt_buf_size;
u64 read_only_addr;
u32 read_only_size;
+ u32 sched_mode;
};
int ivpu_fw_init(struct ivpu_device *vdev);
diff --git a/drivers/accel/ivpu/ivpu_fw_log.c b/drivers/accel/ivpu/ivpu_fw_log.c
index ef0adb5e0fbe..337c906b0210 100644
--- a/drivers/accel/ivpu/ivpu_fw_log.c
+++ b/drivers/accel/ivpu/ivpu_fw_log.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
*/
#include <linux/ctype.h>
@@ -15,19 +15,19 @@
#include "ivpu_fw_log.h"
#include "ivpu_gem.h"
-#define IVPU_FW_LOG_LINE_LENGTH 256
+#define IVPU_FW_LOG_LINE_LENGTH 256
-unsigned int ivpu_log_level = IVPU_FW_LOG_ERROR;
-module_param(ivpu_log_level, uint, 0444);
-MODULE_PARM_DESC(ivpu_log_level,
- "NPU firmware default trace level: debug=" __stringify(IVPU_FW_LOG_DEBUG)
+unsigned int ivpu_fw_log_level = IVPU_FW_LOG_ERROR;
+module_param_named(fw_log_level, ivpu_fw_log_level, uint, 0444);
+MODULE_PARM_DESC(fw_log_level,
+ "NPU firmware default log level: debug=" __stringify(IVPU_FW_LOG_DEBUG)
" info=" __stringify(IVPU_FW_LOG_INFO)
" warn=" __stringify(IVPU_FW_LOG_WARN)
" error=" __stringify(IVPU_FW_LOG_ERROR)
" fatal=" __stringify(IVPU_FW_LOG_FATAL));
-static int fw_log_ptr(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset,
- struct vpu_tracing_buffer_header **log_header)
+static int fw_log_from_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset,
+ struct vpu_tracing_buffer_header **out_log)
{
struct vpu_tracing_buffer_header *log;
@@ -48,7 +48,7 @@ static int fw_log_ptr(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset,
return -EINVAL;
}
- *log_header = log;
+ *out_log = log;
*offset += log->size;
ivpu_dbg(vdev, FW_BOOT,
@@ -59,7 +59,7 @@ static int fw_log_ptr(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset,
return 0;
}
-static void buffer_print(char *buffer, u32 size, struct drm_printer *p)
+static void fw_log_print_lines(char *buffer, u32 size, struct drm_printer *p)
{
char line[IVPU_FW_LOG_LINE_LENGTH];
u32 index = 0;
@@ -87,56 +87,89 @@ static void buffer_print(char *buffer, u32 size, struct drm_printer *p)
}
line[index] = 0;
if (index != 0)
- drm_printf(p, "%s\n", line);
+ drm_printf(p, "%s", line);
}
-static void fw_log_print_buffer(struct ivpu_device *vdev, struct vpu_tracing_buffer_header *log,
- const char *prefix, bool only_new_msgs, struct drm_printer *p)
+static void fw_log_print_buffer(struct vpu_tracing_buffer_header *log, const char *prefix,
+ bool only_new_msgs, struct drm_printer *p)
{
- char *log_buffer = (void *)log + log->header_size;
- u32 log_size = log->size - log->header_size;
- u32 log_start = log->read_index;
- u32 log_end = log->write_index;
-
- if (!(log->write_index || log->wrap_count) ||
- (log->write_index == log->read_index && only_new_msgs)) {
- drm_printf(p, "==== %s \"%s\" log empty ====\n", prefix, log->name);
- return;
+ char *log_data = (void *)log + log->header_size;
+ u32 data_size = log->size - log->header_size;
+ u32 log_start = only_new_msgs ? READ_ONCE(log->read_index) : 0;
+ u32 log_end = READ_ONCE(log->write_index);
+
+ if (log->wrap_count == log->read_wrap_count) {
+ if (log_end <= log_start) {
+ drm_printf(p, "==== %s \"%s\" log empty ====\n", prefix, log->name);
+ return;
+ }
+ } else if (log->wrap_count == log->read_wrap_count + 1) {
+ if (log_end > log_start)
+ log_start = log_end;
+ } else {
+ log_start = log_end;
}
drm_printf(p, "==== %s \"%s\" log start ====\n", prefix, log->name);
- if (log->write_index > log->read_index) {
- buffer_print(log_buffer + log_start, log_end - log_start, p);
+ if (log_end > log_start) {
+ fw_log_print_lines(log_data + log_start, log_end - log_start, p);
} else {
- buffer_print(log_buffer + log_end, log_size - log_end, p);
- buffer_print(log_buffer, log_end, p);
+ fw_log_print_lines(log_data + log_start, data_size - log_start, p);
+ fw_log_print_lines(log_data, log_end, p);
}
- drm_printf(p, "\x1b[0m");
+ drm_printf(p, "\n\x1b[0m"); /* add new line and clear formatting */
drm_printf(p, "==== %s \"%s\" log end ====\n", prefix, log->name);
}
-void ivpu_fw_log_print(struct ivpu_device *vdev, bool only_new_msgs, struct drm_printer *p)
+static void
+fw_log_print_all_in_bo(struct ivpu_device *vdev, const char *name,
+ struct ivpu_bo *bo, bool only_new_msgs, struct drm_printer *p)
{
- struct vpu_tracing_buffer_header *log_header;
+ struct vpu_tracing_buffer_header *log;
u32 next = 0;
- while (fw_log_ptr(vdev, vdev->fw->mem_log_crit, &next, &log_header) == 0)
- fw_log_print_buffer(vdev, log_header, "NPU critical", only_new_msgs, p);
+ while (fw_log_from_bo(vdev, bo, &next, &log) == 0)
+ fw_log_print_buffer(log, name, only_new_msgs, p);
+}
+
+void ivpu_fw_log_print(struct ivpu_device *vdev, bool only_new_msgs, struct drm_printer *p)
+{
+ fw_log_print_all_in_bo(vdev, "NPU critical", vdev->fw->mem_log_crit, only_new_msgs, p);
+ fw_log_print_all_in_bo(vdev, "NPU verbose", vdev->fw->mem_log_verb, only_new_msgs, p);
+}
+
+void ivpu_fw_log_mark_read(struct ivpu_device *vdev)
+{
+ struct vpu_tracing_buffer_header *log;
+ u32 next;
+
+ next = 0;
+ while (fw_log_from_bo(vdev, vdev->fw->mem_log_crit, &next, &log) == 0) {
+ log->read_index = READ_ONCE(log->write_index);
+ log->read_wrap_count = READ_ONCE(log->wrap_count);
+ }
next = 0;
- while (fw_log_ptr(vdev, vdev->fw->mem_log_verb, &next, &log_header) == 0)
- fw_log_print_buffer(vdev, log_header, "NPU verbose", only_new_msgs, p);
+ while (fw_log_from_bo(vdev, vdev->fw->mem_log_verb, &next, &log) == 0) {
+ log->read_index = READ_ONCE(log->write_index);
+ log->read_wrap_count = READ_ONCE(log->wrap_count);
+ }
}
-void ivpu_fw_log_clear(struct ivpu_device *vdev)
+void ivpu_fw_log_reset(struct ivpu_device *vdev)
{
- struct vpu_tracing_buffer_header *log_header;
- u32 next = 0;
+ struct vpu_tracing_buffer_header *log;
+ u32 next;
- while (fw_log_ptr(vdev, vdev->fw->mem_log_crit, &next, &log_header) == 0)
- log_header->read_index = log_header->write_index;
+ next = 0;
+ while (fw_log_from_bo(vdev, vdev->fw->mem_log_crit, &next, &log) == 0) {
+ log->read_index = 0;
+ log->read_wrap_count = 0;
+ }
next = 0;
- while (fw_log_ptr(vdev, vdev->fw->mem_log_verb, &next, &log_header) == 0)
- log_header->read_index = log_header->write_index;
+ while (fw_log_from_bo(vdev, vdev->fw->mem_log_verb, &next, &log) == 0) {
+ log->read_index = 0;
+ log->read_wrap_count = 0;
+ }
}
diff --git a/drivers/accel/ivpu/ivpu_fw_log.h b/drivers/accel/ivpu/ivpu_fw_log.h
index 0b2573f6f315..8bb528a73cb7 100644
--- a/drivers/accel/ivpu/ivpu_fw_log.h
+++ b/drivers/accel/ivpu/ivpu_fw_log.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
*/
#ifndef __IVPU_FW_LOG_H__
@@ -8,8 +8,6 @@
#include <linux/types.h>
-#include <drm/drm_print.h>
-
#include "ivpu_drv.h"
#define IVPU_FW_LOG_DEFAULT 0
@@ -19,20 +17,15 @@
#define IVPU_FW_LOG_ERROR 4
#define IVPU_FW_LOG_FATAL 5
-extern unsigned int ivpu_log_level;
-
#define IVPU_FW_VERBOSE_BUFFER_SMALL_SIZE SZ_1M
#define IVPU_FW_VERBOSE_BUFFER_LARGE_SIZE SZ_8M
#define IVPU_FW_CRITICAL_BUFFER_SIZE SZ_512K
-void ivpu_fw_log_print(struct ivpu_device *vdev, bool only_new_msgs, struct drm_printer *p);
-void ivpu_fw_log_clear(struct ivpu_device *vdev);
+extern unsigned int ivpu_fw_log_level;
-static inline void ivpu_fw_log_dump(struct ivpu_device *vdev)
-{
- struct drm_printer p = drm_info_printer(vdev->drm.dev);
+void ivpu_fw_log_print(struct ivpu_device *vdev, bool only_new_msgs, struct drm_printer *p);
+void ivpu_fw_log_mark_read(struct ivpu_device *vdev);
+void ivpu_fw_log_reset(struct ivpu_device *vdev);
- ivpu_fw_log_print(vdev, false, &p);
-}
#endif /* __IVPU_FW_LOG_H__ */
diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c
index 1b409dbd332d..16178054e629 100644
--- a/drivers/accel/ivpu/ivpu_gem.c
+++ b/drivers/accel/ivpu/ivpu_gem.c
@@ -384,6 +384,9 @@ int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file
timeout = drm_timeout_abs_to_jiffies(args->timeout_ns);
+ /* Add 1 jiffy to ensure the wait function never times out before intended timeout_ns */
+ timeout += 1;
+
obj = drm_gem_object_lookup(file, args->handle);
if (!obj)
return -EINVAL;
@@ -406,7 +409,7 @@ static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p)
mutex_lock(&bo->lock);
drm_printf(p, "%-9p %-3u 0x%-12llx %-10lu 0x%-8x %-4u",
- bo, bo->ctx->id, bo->vpu_addr, bo->base.base.size,
+ bo, bo->ctx ? bo->ctx->id : 0, bo->vpu_addr, bo->base.base.size,
bo->flags, kref_read(&bo->base.base.refcount));
if (bo->base.pages)
diff --git a/drivers/accel/ivpu/ivpu_hw.c b/drivers/accel/ivpu/ivpu_hw.c
index e69c0613513f..4e1054f3466e 100644
--- a/drivers/accel/ivpu/ivpu_hw.c
+++ b/drivers/accel/ivpu/ivpu_hw.c
@@ -89,12 +89,14 @@ static void timeouts_init(struct ivpu_device *vdev)
vdev->timeout.tdr = 2000000;
vdev->timeout.autosuspend = -1;
vdev->timeout.d0i3_entry_msg = 500;
+ vdev->timeout.state_dump_msg = 10;
} else if (ivpu_is_simics(vdev)) {
vdev->timeout.boot = 50;
vdev->timeout.jsm = 500;
vdev->timeout.tdr = 10000;
- vdev->timeout.autosuspend = -1;
+ vdev->timeout.autosuspend = 100;
vdev->timeout.d0i3_entry_msg = 100;
+ vdev->timeout.state_dump_msg = 10;
} else {
vdev->timeout.boot = 1000;
vdev->timeout.jsm = 500;
@@ -104,6 +106,7 @@ static void timeouts_init(struct ivpu_device *vdev)
else
vdev->timeout.autosuspend = 100;
vdev->timeout.d0i3_entry_msg = 5;
+ vdev->timeout.state_dump_msg = 10;
}
}
@@ -111,14 +114,14 @@ static void memory_ranges_init(struct ivpu_device *vdev)
{
if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) {
ivpu_hw_range_init(&vdev->hw->ranges.global, 0x80000000, SZ_512M);
- ivpu_hw_range_init(&vdev->hw->ranges.user, 0xc0000000, 255 * SZ_1M);
+ ivpu_hw_range_init(&vdev->hw->ranges.user, 0x88000000, 511 * SZ_1M);
ivpu_hw_range_init(&vdev->hw->ranges.shave, 0x180000000, SZ_2G);
- ivpu_hw_range_init(&vdev->hw->ranges.dma, 0x200000000, SZ_8G);
+ ivpu_hw_range_init(&vdev->hw->ranges.dma, 0x200000000, SZ_128G);
} else {
ivpu_hw_range_init(&vdev->hw->ranges.global, 0x80000000, SZ_512M);
- ivpu_hw_range_init(&vdev->hw->ranges.user, 0x80000000, SZ_256M);
- ivpu_hw_range_init(&vdev->hw->ranges.shave, 0x80000000 + SZ_256M, SZ_2G - SZ_256M);
- ivpu_hw_range_init(&vdev->hw->ranges.dma, 0x200000000, SZ_8G);
+ ivpu_hw_range_init(&vdev->hw->ranges.shave, 0x80000000, SZ_2G);
+ ivpu_hw_range_init(&vdev->hw->ranges.user, 0x100000000, SZ_256G);
+ vdev->hw->ranges.dma = vdev->hw->ranges.user;
}
}
diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h
index a96a05b2acda..fc4dbfc980c8 100644
--- a/drivers/accel/ivpu/ivpu_hw.h
+++ b/drivers/accel/ivpu/ivpu_hw.h
@@ -46,7 +46,6 @@ struct ivpu_hw_info {
u32 profiling_freq;
} pll;
u32 tile_fuse;
- u32 sched_mode;
u32 sku;
u16 config;
int dma_bits;
diff --git a/drivers/accel/ivpu/ivpu_hw_40xx_reg.h b/drivers/accel/ivpu/ivpu_hw_40xx_reg.h
index d0b795b344c7..fc0ee8d637f9 100644
--- a/drivers/accel/ivpu/ivpu_hw_40xx_reg.h
+++ b/drivers/accel/ivpu/ivpu_hw_40xx_reg.h
@@ -115,6 +115,8 @@
#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY 0x00030068u
#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY_POST_DLY_MASK GENMASK(7, 0)
+#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY_POST1_DLY_MASK GENMASK(15, 8)
+#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY_POST2_DLY_MASK GENMASK(23, 16)
#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_STATUS_DLY 0x0003006cu
#define VPU_50XX_HOST_SS_AON_PWR_ISLAND_STATUS_DLY_STATUS_DLY_MASK GENMASK(7, 0)
diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.c b/drivers/accel/ivpu/ivpu_hw_btrs.c
index 745e5248803d..3212c99f3682 100644
--- a/drivers/accel/ivpu/ivpu_hw_btrs.c
+++ b/drivers/accel/ivpu/ivpu_hw_btrs.c
@@ -141,16 +141,10 @@ static int read_tile_config_fuse(struct ivpu_device *vdev, u32 *tile_fuse_config
}
config = REG_GET_FLD(VPU_HW_BTRS_LNL_TILE_FUSE, CONFIG, fuse);
- if (!tile_disable_check(config)) {
- ivpu_err(vdev, "Fuse: Invalid tile disable config (0x%x)\n", config);
- return -EIO;
- }
+ if (!tile_disable_check(config))
+ ivpu_warn(vdev, "More than 1 tile disabled, tile fuse config mask: 0x%x\n", config);
- if (config)
- ivpu_dbg(vdev, MISC, "Fuse: %d tiles enabled. Tile number %d disabled\n",
- BTRS_LNL_TILE_MAX_NUM - 1, ffs(config) - 1);
- else
- ivpu_dbg(vdev, MISC, "Fuse: All %d tiles enabled\n", BTRS_LNL_TILE_MAX_NUM);
+ ivpu_dbg(vdev, MISC, "Tile disable config mask: 0x%x\n", config);
*tile_fuse_config = config;
return 0;
@@ -163,7 +157,6 @@ static int info_init_mtl(struct ivpu_device *vdev)
hw->tile_fuse = BTRS_MTL_TILE_FUSE_ENABLE_BOTH;
hw->sku = BTRS_MTL_TILE_SKU_BOTH;
hw->config = BTRS_MTL_WP_CONFIG_2_TILE_4_3_RATIO;
- hw->sched_mode = ivpu_sched_mode;
return 0;
}
@@ -178,7 +171,6 @@ static int info_init_lnl(struct ivpu_device *vdev)
if (ret)
return ret;
- hw->sched_mode = ivpu_sched_mode;
hw->tile_fuse = tile_fuse_config;
hw->pll.profiling_freq = PLL_PROFILING_FREQ_DEFAULT;
@@ -315,10 +307,6 @@ static void prepare_wp_request(struct ivpu_device *vdev, struct wp_request *wp,
wp->cdyn = enable ? PLL_CDYN_DEFAULT : 0;
wp->epp = enable ? PLL_EPP_DEFAULT : 0;
}
-
- /* Simics cannot start without at least one tile */
- if (enable && ivpu_is_simics(vdev))
- wp->cfg = 1;
}
static int wait_for_pll_lock(struct ivpu_device *vdev, bool enable)
@@ -465,9 +453,6 @@ int ivpu_hw_btrs_wait_for_clock_res_own_ack(struct ivpu_device *vdev)
if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
return 0;
- if (ivpu_is_simics(vdev))
- return 0;
-
return REGB_POLL_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, CLOCK_RESOURCE_OWN_ACK, 1, TIMEOUT_US);
}
diff --git a/drivers/accel/ivpu/ivpu_hw_ip.c b/drivers/accel/ivpu/ivpu_hw_ip.c
index 60b33fc59d96..029dd065614b 100644
--- a/drivers/accel/ivpu/ivpu_hw_ip.c
+++ b/drivers/accel/ivpu/ivpu_hw_ip.c
@@ -8,15 +8,12 @@
#include "ivpu_hw.h"
#include "ivpu_hw_37xx_reg.h"
#include "ivpu_hw_40xx_reg.h"
+#include "ivpu_hw_btrs.h"
#include "ivpu_hw_ip.h"
#include "ivpu_hw_reg_io.h"
#include "ivpu_mmu.h"
#include "ivpu_pm.h"
-#define PWR_ISLAND_EN_POST_DLY_FREQ_DEFAULT 0
-#define PWR_ISLAND_EN_POST_DLY_FREQ_HIGH 18
-#define PWR_ISLAND_STATUS_DLY_FREQ_DEFAULT 3
-#define PWR_ISLAND_STATUS_DLY_FREQ_HIGH 46
#define PWR_ISLAND_STATUS_TIMEOUT_US (5 * USEC_PER_MSEC)
#define TIM_SAFE_ENABLE 0xf1d0dead
@@ -268,20 +265,15 @@ void ivpu_hw_ip_idle_gen_disable(struct ivpu_device *vdev)
idle_gen_drive_40xx(vdev, false);
}
-static void pwr_island_delay_set_50xx(struct ivpu_device *vdev)
+static void
+pwr_island_delay_set_50xx(struct ivpu_device *vdev, u32 post, u32 post1, u32 post2, u32 status)
{
- u32 val, post, status;
-
- if (vdev->hw->pll.profiling_freq == PLL_PROFILING_FREQ_DEFAULT) {
- post = PWR_ISLAND_EN_POST_DLY_FREQ_DEFAULT;
- status = PWR_ISLAND_STATUS_DLY_FREQ_DEFAULT;
- } else {
- post = PWR_ISLAND_EN_POST_DLY_FREQ_HIGH;
- status = PWR_ISLAND_STATUS_DLY_FREQ_HIGH;
- }
+ u32 val;
val = REGV_RD32(VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY);
val = REG_SET_FLD_NUM(VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY, POST_DLY, post, val);
+ val = REG_SET_FLD_NUM(VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY, POST1_DLY, post1, val);
+ val = REG_SET_FLD_NUM(VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY, POST2_DLY, post2, val);
REGV_WR32(VPU_50XX_HOST_SS_AON_PWR_ISLAND_EN_POST_DLY, val);
val = REGV_RD32(VPU_50XX_HOST_SS_AON_PWR_ISLAND_STATUS_DLY);
@@ -311,9 +303,6 @@ static void pwr_island_trickle_drive_40xx(struct ivpu_device *vdev, bool enable)
val = REG_CLR_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, CSS_CPU, val);
REGV_WR32(VPU_40XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0, val);
-
- if (enable)
- ndelay(500);
}
static void pwr_island_drive_37xx(struct ivpu_device *vdev, bool enable)
@@ -326,9 +315,6 @@ static void pwr_island_drive_37xx(struct ivpu_device *vdev, bool enable)
val = REG_CLR_FLD(VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0, CSS_CPU, val);
REGV_WR32(VPU_40XX_HOST_SS_AON_PWR_ISLAND_EN0, val);
-
- if (!enable)
- ndelay(500);
}
static void pwr_island_drive_40xx(struct ivpu_device *vdev, bool enable)
@@ -347,9 +333,11 @@ static void pwr_island_enable(struct ivpu_device *vdev)
{
if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX) {
pwr_island_trickle_drive_37xx(vdev, true);
+ ndelay(500);
pwr_island_drive_37xx(vdev, true);
} else {
pwr_island_trickle_drive_40xx(vdev, true);
+ ndelay(500);
pwr_island_drive_40xx(vdev, true);
}
}
@@ -686,13 +674,36 @@ static void dpu_active_drive_37xx(struct ivpu_device *vdev, bool enable)
REGV_WR32(VPU_37XX_HOST_SS_AON_DPU_ACTIVE, val);
}
+static void pwr_island_delay_set(struct ivpu_device *vdev)
+{
+ bool high = vdev->hw->pll.profiling_freq == PLL_PROFILING_FREQ_HIGH;
+ u32 post, post1, post2, status;
+
+ if (ivpu_hw_ip_gen(vdev) < IVPU_HW_IP_50XX)
+ return;
+
+ switch (ivpu_device_id(vdev)) {
+ case PCI_DEVICE_ID_PTL_P:
+ post = high ? 18 : 0;
+ post1 = 0;
+ post2 = 0;
+ status = high ? 46 : 3;
+ break;
+
+ default:
+ dump_stack();
+ ivpu_err(vdev, "Unknown device ID\n");
+ return;
+ }
+
+ pwr_island_delay_set_50xx(vdev, post, post1, post2, status);
+}
+
int ivpu_hw_ip_pwr_domain_enable(struct ivpu_device *vdev)
{
int ret;
- if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_50XX)
- pwr_island_delay_set_50xx(vdev);
-
+ pwr_island_delay_set(vdev);
pwr_island_enable(vdev);
ret = wait_for_pwr_island_status(vdev, 0x1);
diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c
index 78b32a823241..01ebf88fe6ef 100644
--- a/drivers/accel/ivpu/ivpu_ipc.c
+++ b/drivers/accel/ivpu/ivpu_ipc.c
@@ -15,6 +15,7 @@
#include "ivpu_ipc.h"
#include "ivpu_jsm_msg.h"
#include "ivpu_pm.h"
+#include "ivpu_trace.h"
#define IPC_MAX_RX_MSG 128
@@ -227,6 +228,7 @@ int ivpu_ipc_send(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, stru
goto unlock;
ivpu_ipc_tx(vdev, cons->tx_vpu_addr);
+ trace_jsm("[tx]", req);
unlock:
mutex_unlock(&ipc->lock);
@@ -278,12 +280,13 @@ int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
u32 size = min_t(int, rx_msg->ipc_hdr->data_size, sizeof(*jsm_msg));
if (rx_msg->jsm_msg->result != VPU_JSM_STATUS_SUCCESS) {
- ivpu_dbg(vdev, IPC, "IPC resp result error: %d\n", rx_msg->jsm_msg->result);
+ ivpu_err(vdev, "IPC resp result error: %d\n", rx_msg->jsm_msg->result);
ret = -EBADMSG;
}
if (jsm_msg)
memcpy(jsm_msg, rx_msg->jsm_msg, size);
+ trace_jsm("[rx]", rx_msg->jsm_msg);
}
ivpu_ipc_rx_msg_del(vdev, rx_msg);
@@ -291,15 +294,16 @@ int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
return ret;
}
-static int
+int
ivpu_ipc_send_receive_internal(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
enum vpu_ipc_msg_type expected_resp_type,
- struct vpu_jsm_msg *resp, u32 channel,
- unsigned long timeout_ms)
+ struct vpu_jsm_msg *resp, u32 channel, unsigned long timeout_ms)
{
struct ivpu_ipc_consumer cons;
int ret;
+ drm_WARN_ON(&vdev->drm, pm_runtime_status_suspended(vdev->drm.dev));
+
ivpu_ipc_consumer_add(vdev, &cons, channel, NULL);
ret = ivpu_ipc_send(vdev, &cons, req);
@@ -325,19 +329,21 @@ consumer_del:
return ret;
}
-int ivpu_ipc_send_receive_active(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
- enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp,
- u32 channel, unsigned long timeout_ms)
+int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
+ enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp,
+ u32 channel, unsigned long timeout_ms)
{
struct vpu_jsm_msg hb_req = { .type = VPU_JSM_MSG_QUERY_ENGINE_HB };
struct vpu_jsm_msg hb_resp;
int ret, hb_ret;
- drm_WARN_ON(&vdev->drm, pm_runtime_status_suspended(vdev->drm.dev));
+ ret = ivpu_rpm_get(vdev);
+ if (ret < 0)
+ return ret;
ret = ivpu_ipc_send_receive_internal(vdev, req, expected_resp, resp, channel, timeout_ms);
if (ret != -ETIMEDOUT)
- return ret;
+ goto rpm_put;
hb_ret = ivpu_ipc_send_receive_internal(vdev, &hb_req, VPU_JSM_MSG_QUERY_ENGINE_HB_DONE,
&hb_resp, VPU_IPC_CHAN_ASYNC_CMD,
@@ -345,21 +351,33 @@ int ivpu_ipc_send_receive_active(struct ivpu_device *vdev, struct vpu_jsm_msg *r
if (hb_ret == -ETIMEDOUT)
ivpu_pm_trigger_recovery(vdev, "IPC timeout");
+rpm_put:
+ ivpu_rpm_put(vdev);
return ret;
}
-int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
- enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp,
- u32 channel, unsigned long timeout_ms)
+int ivpu_ipc_send_and_wait(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
+ u32 channel, unsigned long timeout_ms)
{
+ struct ivpu_ipc_consumer cons;
int ret;
ret = ivpu_rpm_get(vdev);
if (ret < 0)
return ret;
- ret = ivpu_ipc_send_receive_active(vdev, req, expected_resp, resp, channel, timeout_ms);
+ ivpu_ipc_consumer_add(vdev, &cons, channel, NULL);
+ ret = ivpu_ipc_send(vdev, &cons, req);
+ if (ret) {
+ ivpu_warn_ratelimited(vdev, "IPC send failed: %d\n", ret);
+ goto consumer_del;
+ }
+
+ msleep(timeout_ms);
+
+consumer_del:
+ ivpu_ipc_consumer_del(vdev, &cons);
ivpu_rpm_put(vdev);
return ret;
}
@@ -518,7 +536,6 @@ void ivpu_ipc_fini(struct ivpu_device *vdev)
{
struct ivpu_ipc_info *ipc = vdev->ipc;
- drm_WARN_ON(&vdev->drm, ipc->on);
drm_WARN_ON(&vdev->drm, !list_empty(&ipc->cons_list));
drm_WARN_ON(&vdev->drm, !list_empty(&ipc->cb_msg_list));
drm_WARN_ON(&vdev->drm, atomic_read(&ipc->rx_msg_count) > 0);
diff --git a/drivers/accel/ivpu/ivpu_ipc.h b/drivers/accel/ivpu/ivpu_ipc.h
index 4fe38141045e..b4dfb504679b 100644
--- a/drivers/accel/ivpu/ivpu_ipc.h
+++ b/drivers/accel/ivpu/ivpu_ipc.h
@@ -101,12 +101,13 @@ int ivpu_ipc_send(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
struct ivpu_ipc_hdr *ipc_buf, struct vpu_jsm_msg *jsm_msg,
unsigned long timeout_ms);
-
-int ivpu_ipc_send_receive_active(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
- enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp,
- u32 channel, unsigned long timeout_ms);
+int ivpu_ipc_send_receive_internal(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
+ enum vpu_ipc_msg_type expected_resp_type,
+ struct vpu_jsm_msg *resp, u32 channel, unsigned long timeout_ms);
int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp,
u32 channel, unsigned long timeout_ms);
+int ivpu_ipc_send_and_wait(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
+ u32 channel, unsigned long timeout_ms);
#endif /* __IVPU_IPC_H__ */
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c
index be2e2bf0f43f..7149312f16e1 100644
--- a/drivers/accel/ivpu/ivpu_job.c
+++ b/drivers/accel/ivpu/ivpu_job.c
@@ -18,11 +18,10 @@
#include "ivpu_job.h"
#include "ivpu_jsm_msg.h"
#include "ivpu_pm.h"
+#include "ivpu_trace.h"
#include "vpu_boot_api.h"
#define CMD_BUF_IDX 0
-#define JOB_ID_JOB_MASK GENMASK(7, 0)
-#define JOB_ID_CONTEXT_MASK GENMASK(31, 8)
#define JOB_MAX_BUFFER_COUNT 65535
static void ivpu_cmdq_ring_db(struct ivpu_device *vdev, struct ivpu_cmdq *cmdq)
@@ -35,24 +34,20 @@ static int ivpu_preemption_buffers_create(struct ivpu_device *vdev,
{
u64 primary_size = ALIGN(vdev->fw->primary_preempt_buf_size, PAGE_SIZE);
u64 secondary_size = ALIGN(vdev->fw->secondary_preempt_buf_size, PAGE_SIZE);
- struct ivpu_addr_range range;
- if (vdev->hw->sched_mode != VPU_SCHEDULING_MODE_HW)
+ if (vdev->fw->sched_mode != VPU_SCHEDULING_MODE_HW ||
+ ivpu_test_mode & IVPU_TEST_MODE_MIP_DISABLE)
return 0;
- range.start = vdev->hw->ranges.user.end - (primary_size * IVPU_NUM_CMDQS_PER_CTX);
- range.end = vdev->hw->ranges.user.end;
- cmdq->primary_preempt_buf = ivpu_bo_create(vdev, &file_priv->ctx, &range, primary_size,
- DRM_IVPU_BO_WC);
+ cmdq->primary_preempt_buf = ivpu_bo_create(vdev, &file_priv->ctx, &vdev->hw->ranges.user,
+ primary_size, DRM_IVPU_BO_WC);
if (!cmdq->primary_preempt_buf) {
ivpu_err(vdev, "Failed to create primary preemption buffer\n");
return -ENOMEM;
}
- range.start = vdev->hw->ranges.shave.end - (secondary_size * IVPU_NUM_CMDQS_PER_CTX);
- range.end = vdev->hw->ranges.shave.end;
- cmdq->secondary_preempt_buf = ivpu_bo_create(vdev, &file_priv->ctx, &range, secondary_size,
- DRM_IVPU_BO_WC);
+ cmdq->secondary_preempt_buf = ivpu_bo_create(vdev, &file_priv->ctx, &vdev->hw->ranges.dma,
+ secondary_size, DRM_IVPU_BO_WC);
if (!cmdq->secondary_preempt_buf) {
ivpu_err(vdev, "Failed to create secondary preemption buffer\n");
goto err_free_primary;
@@ -62,24 +57,24 @@ static int ivpu_preemption_buffers_create(struct ivpu_device *vdev,
err_free_primary:
ivpu_bo_free(cmdq->primary_preempt_buf);
+ cmdq->primary_preempt_buf = NULL;
return -ENOMEM;
}
static void ivpu_preemption_buffers_free(struct ivpu_device *vdev,
struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq)
{
- if (vdev->hw->sched_mode != VPU_SCHEDULING_MODE_HW)
+ if (vdev->fw->sched_mode != VPU_SCHEDULING_MODE_HW)
return;
- drm_WARN_ON(&vdev->drm, !cmdq->primary_preempt_buf);
- drm_WARN_ON(&vdev->drm, !cmdq->secondary_preempt_buf);
- ivpu_bo_free(cmdq->primary_preempt_buf);
- ivpu_bo_free(cmdq->secondary_preempt_buf);
+ if (cmdq->primary_preempt_buf)
+ ivpu_bo_free(cmdq->primary_preempt_buf);
+ if (cmdq->secondary_preempt_buf)
+ ivpu_bo_free(cmdq->secondary_preempt_buf);
}
static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv)
{
- struct xa_limit db_xa_limit = {.max = IVPU_MAX_DB, .min = IVPU_MIN_DB};
struct ivpu_device *vdev = file_priv->vdev;
struct ivpu_cmdq *cmdq;
int ret;
@@ -88,25 +83,33 @@ static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv)
if (!cmdq)
return NULL;
- ret = xa_alloc(&vdev->db_xa, &cmdq->db_id, NULL, db_xa_limit, GFP_KERNEL);
- if (ret) {
+ ret = xa_alloc_cyclic(&vdev->db_xa, &cmdq->db_id, NULL, vdev->db_limit, &vdev->db_next,
+ GFP_KERNEL);
+ if (ret < 0) {
ivpu_err(vdev, "Failed to allocate doorbell id: %d\n", ret);
goto err_free_cmdq;
}
+ ret = xa_alloc_cyclic(&file_priv->cmdq_xa, &cmdq->id, cmdq, file_priv->cmdq_limit,
+ &file_priv->cmdq_id_next, GFP_KERNEL);
+ if (ret < 0) {
+ ivpu_err(vdev, "Failed to allocate command queue id: %d\n", ret);
+ goto err_erase_db_xa;
+ }
+
cmdq->mem = ivpu_bo_create_global(vdev, SZ_4K, DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE);
if (!cmdq->mem)
- goto err_erase_xa;
+ goto err_erase_cmdq_xa;
ret = ivpu_preemption_buffers_create(vdev, file_priv, cmdq);
if (ret)
- goto err_free_cmdq_mem;
+ ivpu_warn(vdev, "Failed to allocate preemption buffers, preemption limited\n");
return cmdq;
-err_free_cmdq_mem:
- ivpu_bo_free(cmdq->mem);
-err_erase_xa:
+err_erase_cmdq_xa:
+ xa_erase(&file_priv->cmdq_xa, cmdq->id);
+err_erase_db_xa:
xa_erase(&vdev->db_xa, cmdq->db_id);
err_free_cmdq:
kfree(cmdq);
@@ -130,13 +133,13 @@ static int ivpu_hws_cmdq_init(struct ivpu_file_priv *file_priv, struct ivpu_cmdq
struct ivpu_device *vdev = file_priv->vdev;
int ret;
- ret = ivpu_jsm_hws_create_cmdq(vdev, file_priv->ctx.id, file_priv->ctx.id, cmdq->db_id,
+ ret = ivpu_jsm_hws_create_cmdq(vdev, file_priv->ctx.id, file_priv->ctx.id, cmdq->id,
task_pid_nr(current), engine,
cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem));
if (ret)
return ret;
- ret = ivpu_jsm_hws_set_context_sched_properties(vdev, file_priv->ctx.id, cmdq->db_id,
+ ret = ivpu_jsm_hws_set_context_sched_properties(vdev, file_priv->ctx.id, cmdq->id,
priority);
if (ret)
return ret;
@@ -149,21 +152,22 @@ static int ivpu_register_db(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *
struct ivpu_device *vdev = file_priv->vdev;
int ret;
- if (vdev->hw->sched_mode == VPU_SCHEDULING_MODE_HW)
- ret = ivpu_jsm_hws_register_db(vdev, file_priv->ctx.id, cmdq->db_id, cmdq->db_id,
+ if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW)
+ ret = ivpu_jsm_hws_register_db(vdev, file_priv->ctx.id, cmdq->id, cmdq->db_id,
cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem));
else
ret = ivpu_jsm_register_db(vdev, file_priv->ctx.id, cmdq->db_id,
cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem));
if (!ret)
- ivpu_dbg(vdev, JOB, "DB %d registered to ctx %d\n", cmdq->db_id, file_priv->ctx.id);
+ ivpu_dbg(vdev, JOB, "DB %d registered to cmdq %d ctx %d\n",
+ cmdq->db_id, cmdq->id, file_priv->ctx.id);
return ret;
}
static int
-ivpu_cmdq_init(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq, u16 engine, u8 priority)
+ivpu_cmdq_init(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq, u8 priority)
{
struct ivpu_device *vdev = file_priv->vdev;
struct vpu_job_queue_header *jobq_header;
@@ -179,13 +183,18 @@ ivpu_cmdq_init(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq, u16 eng
cmdq->jobq = (struct vpu_job_queue *)ivpu_bo_vaddr(cmdq->mem);
jobq_header = &cmdq->jobq->header;
- jobq_header->engine_idx = engine;
+ jobq_header->engine_idx = VPU_ENGINE_COMPUTE;
jobq_header->head = 0;
jobq_header->tail = 0;
+ if (ivpu_test_mode & IVPU_TEST_MODE_TURBO) {
+ ivpu_dbg(vdev, JOB, "Turbo mode enabled");
+ jobq_header->flags = VPU_JOB_QUEUE_FLAGS_TURBO_MODE;
+ }
+
wmb(); /* Flush WC buffer for jobq->header */
- if (vdev->hw->sched_mode == VPU_SCHEDULING_MODE_HW) {
- ret = ivpu_hws_cmdq_init(file_priv, cmdq, engine, priority);
+ if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) {
+ ret = ivpu_hws_cmdq_init(file_priv, cmdq, VPU_ENGINE_COMPUTE, priority);
if (ret)
return ret;
}
@@ -211,10 +220,10 @@ static int ivpu_cmdq_fini(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cm
cmdq->db_registered = false;
- if (vdev->hw->sched_mode == VPU_SCHEDULING_MODE_HW) {
- ret = ivpu_jsm_hws_destroy_cmdq(vdev, file_priv->ctx.id, cmdq->db_id);
+ if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) {
+ ret = ivpu_jsm_hws_destroy_cmdq(vdev, file_priv->ctx.id, cmdq->id);
if (!ret)
- ivpu_dbg(vdev, JOB, "Command queue %d destroyed\n", cmdq->db_id);
+ ivpu_dbg(vdev, JOB, "Command queue %d destroyed\n", cmdq->id);
}
ret = ivpu_jsm_unregister_db(vdev, cmdq->db_id);
@@ -224,55 +233,46 @@ static int ivpu_cmdq_fini(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cm
return 0;
}
-static struct ivpu_cmdq *ivpu_cmdq_acquire(struct ivpu_file_priv *file_priv, u16 engine,
- u8 priority)
+static struct ivpu_cmdq *ivpu_cmdq_acquire(struct ivpu_file_priv *file_priv, u8 priority)
{
- int cmdq_idx = IVPU_CMDQ_INDEX(engine, priority);
- struct ivpu_cmdq *cmdq = file_priv->cmdq[cmdq_idx];
+ struct ivpu_cmdq *cmdq;
+ unsigned long cmdq_id;
int ret;
lockdep_assert_held(&file_priv->lock);
+ xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq)
+ if (cmdq->priority == priority)
+ break;
+
if (!cmdq) {
cmdq = ivpu_cmdq_alloc(file_priv);
if (!cmdq)
return NULL;
- file_priv->cmdq[cmdq_idx] = cmdq;
+ cmdq->priority = priority;
}
- ret = ivpu_cmdq_init(file_priv, cmdq, engine, priority);
+ ret = ivpu_cmdq_init(file_priv, cmdq, priority);
if (ret)
return NULL;
return cmdq;
}
-static void ivpu_cmdq_release_locked(struct ivpu_file_priv *file_priv, u16 engine, u8 priority)
+void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv)
{
- int cmdq_idx = IVPU_CMDQ_INDEX(engine, priority);
- struct ivpu_cmdq *cmdq = file_priv->cmdq[cmdq_idx];
+ struct ivpu_cmdq *cmdq;
+ unsigned long cmdq_id;
lockdep_assert_held(&file_priv->lock);
- if (cmdq) {
- file_priv->cmdq[cmdq_idx] = NULL;
+ xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq) {
+ xa_erase(&file_priv->cmdq_xa, cmdq_id);
ivpu_cmdq_fini(file_priv, cmdq);
ivpu_cmdq_free(file_priv, cmdq);
}
}
-void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv)
-{
- u16 engine;
- u8 priority;
-
- lockdep_assert_held(&file_priv->lock);
-
- for (engine = 0; engine < IVPU_NUM_ENGINES; engine++)
- for (priority = 0; priority < IVPU_NUM_PRIORITIES; priority++)
- ivpu_cmdq_release_locked(file_priv, engine, priority);
-}
-
/*
* Mark the doorbell as unregistered
* This function needs to be called when the VPU hardware is restarted
@@ -281,20 +281,13 @@ void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv)
*/
static void ivpu_cmdq_reset(struct ivpu_file_priv *file_priv)
{
- u16 engine;
- u8 priority;
+ struct ivpu_cmdq *cmdq;
+ unsigned long cmdq_id;
mutex_lock(&file_priv->lock);
- for (engine = 0; engine < IVPU_NUM_ENGINES; engine++) {
- for (priority = 0; priority < IVPU_NUM_PRIORITIES; priority++) {
- int cmdq_idx = IVPU_CMDQ_INDEX(engine, priority);
- struct ivpu_cmdq *cmdq = file_priv->cmdq[cmdq_idx];
-
- if (cmdq)
- cmdq->db_registered = false;
- }
- }
+ xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq)
+ cmdq->db_registered = false;
mutex_unlock(&file_priv->lock);
}
@@ -314,17 +307,11 @@ void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev)
static void ivpu_cmdq_fini_all(struct ivpu_file_priv *file_priv)
{
- u16 engine;
- u8 priority;
-
- for (engine = 0; engine < IVPU_NUM_ENGINES; engine++) {
- for (priority = 0; priority < IVPU_NUM_PRIORITIES; priority++) {
- int cmdq_idx = IVPU_CMDQ_INDEX(engine, priority);
+ struct ivpu_cmdq *cmdq;
+ unsigned long cmdq_id;
- if (file_priv->cmdq[cmdq_idx])
- ivpu_cmdq_fini(file_priv, file_priv->cmdq[cmdq_idx]);
- }
- }
+ xa_for_each(&file_priv->cmdq_xa, cmdq_id, cmdq)
+ ivpu_cmdq_fini(file_priv, cmdq);
}
void ivpu_context_abort_locked(struct ivpu_file_priv *file_priv)
@@ -335,7 +322,7 @@ void ivpu_context_abort_locked(struct ivpu_file_priv *file_priv)
ivpu_cmdq_fini_all(file_priv);
- if (vdev->hw->sched_mode == VPU_SCHEDULING_MODE_OS)
+ if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_OS)
ivpu_jsm_context_release(vdev, file_priv->ctx.id);
}
@@ -349,24 +336,29 @@ static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job)
/* Check if there is space left in job queue */
if (next_entry == header->head) {
- ivpu_dbg(vdev, JOB, "Job queue full: ctx %d engine %d db %d head %d tail %d\n",
- job->file_priv->ctx.id, job->engine_idx, cmdq->db_id, header->head, tail);
+ ivpu_dbg(vdev, JOB, "Job queue full: ctx %d cmdq %d db %d head %d tail %d\n",
+ job->file_priv->ctx.id, cmdq->id, cmdq->db_id, header->head, tail);
return -EBUSY;
}
- entry = &cmdq->jobq->job[tail];
+ entry = &cmdq->jobq->slot[tail].job;
entry->batch_buf_addr = job->cmd_buf_vpu_addr;
entry->job_id = job->job_id;
entry->flags = 0;
if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_SUBMISSION))
entry->flags = VPU_JOB_FLAGS_NULL_SUBMISSION_MASK;
- if (vdev->hw->sched_mode == VPU_SCHEDULING_MODE_HW &&
- (unlikely(!(ivpu_test_mode & IVPU_TEST_MODE_PREEMPTION_DISABLE)))) {
- entry->primary_preempt_buf_addr = cmdq->primary_preempt_buf->vpu_addr;
- entry->primary_preempt_buf_size = ivpu_bo_size(cmdq->primary_preempt_buf);
- entry->secondary_preempt_buf_addr = cmdq->secondary_preempt_buf->vpu_addr;
- entry->secondary_preempt_buf_size = ivpu_bo_size(cmdq->secondary_preempt_buf);
+ if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) {
+ if (cmdq->primary_preempt_buf) {
+ entry->primary_preempt_buf_addr = cmdq->primary_preempt_buf->vpu_addr;
+ entry->primary_preempt_buf_size = ivpu_bo_size(cmdq->primary_preempt_buf);
+ }
+
+ if (cmdq->secondary_preempt_buf) {
+ entry->secondary_preempt_buf_addr = cmdq->secondary_preempt_buf->vpu_addr;
+ entry->secondary_preempt_buf_size =
+ ivpu_bo_size(cmdq->secondary_preempt_buf);
+ }
}
wmb(); /* Ensure that tail is updated after filling entry */
@@ -457,6 +449,7 @@ ivpu_job_create(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count)
job->file_priv = ivpu_file_priv_get(file_priv);
+ trace_job("create", job);
ivpu_dbg(vdev, JOB, "Job created: ctx %2d engine %d", file_priv->ctx.id, job->engine_idx);
return job;
@@ -496,6 +489,7 @@ static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32
job->bos[CMD_BUF_IDX]->job_status = job_status;
dma_fence_signal(job->done_fence);
+ trace_job("done", job);
ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d engine %d status 0x%x\n",
job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status);
@@ -519,7 +513,6 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority)
{
struct ivpu_file_priv *file_priv = job->file_priv;
struct ivpu_device *vdev = job->vdev;
- struct xa_limit job_id_range;
struct ivpu_cmdq *cmdq;
bool is_first_job;
int ret;
@@ -530,7 +523,7 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority)
mutex_lock(&file_priv->lock);
- cmdq = ivpu_cmdq_acquire(job->file_priv, job->engine_idx, priority);
+ cmdq = ivpu_cmdq_acquire(file_priv, priority);
if (!cmdq) {
ivpu_warn_ratelimited(vdev, "Failed to get job queue, ctx %d engine %d prio %d\n",
file_priv->ctx.id, job->engine_idx, priority);
@@ -538,13 +531,11 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority)
goto err_unlock_file_priv;
}
- job_id_range.min = FIELD_PREP(JOB_ID_CONTEXT_MASK, (file_priv->ctx.id - 1));
- job_id_range.max = job_id_range.min | JOB_ID_JOB_MASK;
-
xa_lock(&vdev->submitted_jobs_xa);
is_first_job = xa_empty(&vdev->submitted_jobs_xa);
- ret = __xa_alloc(&vdev->submitted_jobs_xa, &job->job_id, job, job_id_range, GFP_KERNEL);
- if (ret) {
+ ret = __xa_alloc_cyclic(&vdev->submitted_jobs_xa, &job->job_id, job, file_priv->job_limit,
+ &file_priv->job_id_next, GFP_KERNEL);
+ if (ret < 0) {
ivpu_dbg(vdev, JOB, "Too many active jobs in ctx %d\n",
file_priv->ctx.id);
ret = -EBUSY;
@@ -566,6 +557,7 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority)
vdev->busy_start_ts = ktime_get();
}
+ trace_job("submit", job);
ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d engine %d prio %d addr 0x%llx next %d\n",
job->job_id, file_priv->ctx.id, job->engine_idx, priority,
job->cmd_buf_vpu_addr, cmdq->jobq->header.tail);
@@ -673,7 +665,7 @@ int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
int idx, ret;
u8 priority;
- if (params->engine > DRM_IVPU_ENGINE_COPY)
+ if (params->engine != DRM_IVPU_ENGINE_COMPUTE)
return -EINVAL;
if (params->priority > DRM_IVPU_JOB_PRIORITY_REALTIME)
diff --git a/drivers/accel/ivpu/ivpu_job.h b/drivers/accel/ivpu/ivpu_job.h
index 6accb94028c7..8b19e3f8b4cf 100644
--- a/drivers/accel/ivpu/ivpu_job.h
+++ b/drivers/accel/ivpu/ivpu_job.h
@@ -28,8 +28,10 @@ struct ivpu_cmdq {
struct ivpu_bo *secondary_preempt_buf;
struct ivpu_bo *mem;
u32 entry_count;
+ u32 id;
u32 db_id;
bool db_registered;
+ u8 priority;
};
/**
diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.c b/drivers/accel/ivpu/ivpu_jsm_msg.c
index 46ef16c3c069..30a40be76930 100644
--- a/drivers/accel/ivpu/ivpu_jsm_msg.c
+++ b/drivers/accel/ivpu/ivpu_jsm_msg.c
@@ -48,9 +48,10 @@ const char *ivpu_jsm_msg_type_to_str(enum vpu_ipc_msg_type type)
IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE);
IVPU_CASE_TO_STR(VPU_JSM_MSG_STATE_DUMP);
IVPU_CASE_TO_STR(VPU_JSM_MSG_STATE_DUMP_RSP);
- IVPU_CASE_TO_STR(VPU_JSM_MSG_BLOB_DEINIT);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_BLOB_DEINIT_DEPRECATED);
IVPU_CASE_TO_STR(VPU_JSM_MSG_DYNDBG_CONTROL);
IVPU_CASE_TO_STR(VPU_JSM_MSG_JOB_DONE);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_NATIVE_FENCE_SIGNALLED);
IVPU_CASE_TO_STR(VPU_JSM_MSG_ENGINE_RESET_DONE);
IVPU_CASE_TO_STR(VPU_JSM_MSG_ENGINE_PREEMPT_DONE);
IVPU_CASE_TO_STR(VPU_JSM_MSG_REGISTER_DB_DONE);
@@ -131,7 +132,7 @@ int ivpu_jsm_get_heartbeat(struct ivpu_device *vdev, u32 engine, u64 *heartbeat)
struct vpu_jsm_msg resp;
int ret;
- if (engine > VPU_ENGINE_COPY)
+ if (engine != VPU_ENGINE_COMPUTE)
return -EINVAL;
req.payload.query_engine_hb.engine_idx = engine;
@@ -154,7 +155,7 @@ int ivpu_jsm_reset_engine(struct ivpu_device *vdev, u32 engine)
struct vpu_jsm_msg resp;
int ret;
- if (engine > VPU_ENGINE_COPY)
+ if (engine != VPU_ENGINE_COMPUTE)
return -EINVAL;
req.payload.engine_reset.engine_idx = engine;
@@ -173,7 +174,7 @@ int ivpu_jsm_preempt_engine(struct ivpu_device *vdev, u32 engine, u32 preempt_id
struct vpu_jsm_msg resp;
int ret;
- if (engine > VPU_ENGINE_COPY)
+ if (engine != VPU_ENGINE_COMPUTE)
return -EINVAL;
req.payload.engine_preempt.engine_idx = engine;
@@ -196,7 +197,7 @@ int ivpu_jsm_dyndbg_control(struct ivpu_device *vdev, char *command, size_t size
strscpy(req.payload.dyndbg_control.dyndbg_cmd, command, VPU_DYNDBG_CMD_MAX_LEN);
ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_DYNDBG_CONTROL_RSP, &resp,
- VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+ VPU_IPC_CHAN_GEN_CMD, vdev->timeout.jsm);
if (ret)
ivpu_warn_ratelimited(vdev, "Failed to send command \"%s\": ret %d\n",
command, ret);
@@ -270,9 +271,8 @@ int ivpu_jsm_pwr_d0i3_enter(struct ivpu_device *vdev)
req.payload.pwr_d0i3_enter.send_response = 1;
- ret = ivpu_ipc_send_receive_active(vdev, &req, VPU_JSM_MSG_PWR_D0I3_ENTER_DONE,
- &resp, VPU_IPC_CHAN_GEN_CMD,
- vdev->timeout.d0i3_entry_msg);
+ ret = ivpu_ipc_send_receive_internal(vdev, &req, VPU_JSM_MSG_PWR_D0I3_ENTER_DONE, &resp,
+ VPU_IPC_CHAN_GEN_CMD, vdev->timeout.d0i3_entry_msg);
if (ret)
return ret;
@@ -346,7 +346,7 @@ int ivpu_jsm_hws_resume_engine(struct ivpu_device *vdev, u32 engine)
struct vpu_jsm_msg resp;
int ret;
- if (engine >= VPU_ENGINE_NB)
+ if (engine != VPU_ENGINE_COMPUTE)
return -EINVAL;
req.payload.hws_resume_engine.engine_idx = engine;
@@ -394,8 +394,6 @@ int ivpu_jsm_hws_set_scheduling_log(struct ivpu_device *vdev, u32 engine_idx, u3
req.payload.hws_set_scheduling_log.host_ssid = host_ssid;
req.payload.hws_set_scheduling_log.vpu_log_buffer_va = vpu_log_buffer_va;
req.payload.hws_set_scheduling_log.notify_index = 0;
- req.payload.hws_set_scheduling_log.enable_extra_events =
- ivpu_test_mode & IVPU_TEST_MODE_HWS_EXTRA_EVENTS;
ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG_RSP, &resp,
VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
@@ -430,8 +428,8 @@ int ivpu_jsm_hws_setup_priority_bands(struct ivpu_device *vdev)
req.payload.hws_priority_band_setup.normal_band_percentage = 10;
- ret = ivpu_ipc_send_receive_active(vdev, &req, VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP_RSP,
- &resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+ ret = ivpu_ipc_send_receive_internal(vdev, &req, VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP_RSP,
+ &resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
if (ret)
ivpu_warn_ratelimited(vdev, "Failed to set priority bands: %d\n", ret);
@@ -544,9 +542,8 @@ int ivpu_jsm_dct_enable(struct ivpu_device *vdev, u32 active_us, u32 inactive_us
req.payload.pwr_dct_control.dct_active_us = active_us;
req.payload.pwr_dct_control.dct_inactive_us = inactive_us;
- return ivpu_ipc_send_receive_active(vdev, &req, VPU_JSM_MSG_DCT_ENABLE_DONE,
- &resp, VPU_IPC_CHAN_ASYNC_CMD,
- vdev->timeout.jsm);
+ return ivpu_ipc_send_receive_internal(vdev, &req, VPU_JSM_MSG_DCT_ENABLE_DONE, &resp,
+ VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
}
int ivpu_jsm_dct_disable(struct ivpu_device *vdev)
@@ -554,7 +551,14 @@ int ivpu_jsm_dct_disable(struct ivpu_device *vdev)
struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_DCT_DISABLE };
struct vpu_jsm_msg resp;
- return ivpu_ipc_send_receive_active(vdev, &req, VPU_JSM_MSG_DCT_DISABLE_DONE,
- &resp, VPU_IPC_CHAN_ASYNC_CMD,
- vdev->timeout.jsm);
+ return ivpu_ipc_send_receive_internal(vdev, &req, VPU_JSM_MSG_DCT_DISABLE_DONE, &resp,
+ VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+}
+
+int ivpu_jsm_state_dump(struct ivpu_device *vdev)
+{
+ struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_STATE_DUMP };
+
+ return ivpu_ipc_send_and_wait(vdev, &req, VPU_IPC_CHAN_ASYNC_CMD,
+ vdev->timeout.state_dump_msg);
}
diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.h b/drivers/accel/ivpu/ivpu_jsm_msg.h
index e4e42c0ff6e6..9e84d3526a14 100644
--- a/drivers/accel/ivpu/ivpu_jsm_msg.h
+++ b/drivers/accel/ivpu/ivpu_jsm_msg.h
@@ -43,4 +43,6 @@ int ivpu_jsm_metric_streamer_info(struct ivpu_device *vdev, u64 metric_group_mas
u64 buffer_size, u32 *sample_size, u64 *info_size);
int ivpu_jsm_dct_enable(struct ivpu_device *vdev, u32 active_us, u32 inactive_us);
int ivpu_jsm_dct_disable(struct ivpu_device *vdev);
+int ivpu_jsm_state_dump(struct ivpu_device *vdev);
+
#endif
diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c
index c078e214b221..26ef52fbb93e 100644
--- a/drivers/accel/ivpu/ivpu_mmu.c
+++ b/drivers/accel/ivpu/ivpu_mmu.c
@@ -696,7 +696,7 @@ unlock:
return ret;
}
-static int ivpu_mmu_cd_add(struct ivpu_device *vdev, u32 ssid, u64 cd_dma)
+static int ivpu_mmu_cdtab_entry_set(struct ivpu_device *vdev, u32 ssid, u64 cd_dma, bool valid)
{
struct ivpu_mmu_info *mmu = vdev->mmu;
struct ivpu_mmu_cdtab *cdtab = &mmu->cdtab;
@@ -708,30 +708,29 @@ static int ivpu_mmu_cd_add(struct ivpu_device *vdev, u32 ssid, u64 cd_dma)
return -EINVAL;
entry = cdtab->base + (ssid * IVPU_MMU_CDTAB_ENT_SIZE);
-
- if (cd_dma != 0) {
- cd[0] = FIELD_PREP(IVPU_MMU_CD_0_TCR_T0SZ, IVPU_MMU_T0SZ_48BIT) |
- FIELD_PREP(IVPU_MMU_CD_0_TCR_TG0, 0) |
- FIELD_PREP(IVPU_MMU_CD_0_TCR_IRGN0, 0) |
- FIELD_PREP(IVPU_MMU_CD_0_TCR_ORGN0, 0) |
- FIELD_PREP(IVPU_MMU_CD_0_TCR_SH0, 0) |
- FIELD_PREP(IVPU_MMU_CD_0_TCR_IPS, IVPU_MMU_IPS_48BIT) |
- FIELD_PREP(IVPU_MMU_CD_0_ASID, ssid) |
- IVPU_MMU_CD_0_TCR_EPD1 |
- IVPU_MMU_CD_0_AA64 |
- IVPU_MMU_CD_0_R |
- IVPU_MMU_CD_0_ASET |
- IVPU_MMU_CD_0_V;
- cd[1] = cd_dma & IVPU_MMU_CD_1_TTB0_MASK;
- cd[2] = 0;
- cd[3] = 0x0000000000007444;
-
- /* For global context generate memory fault on VPU */
- if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID)
- cd[0] |= IVPU_MMU_CD_0_A;
- } else {
- memset(cd, 0, sizeof(cd));
- }
+ drm_WARN_ON(&vdev->drm, (entry[0] & IVPU_MMU_CD_0_V) == valid);
+
+ cd[0] = FIELD_PREP(IVPU_MMU_CD_0_TCR_T0SZ, IVPU_MMU_T0SZ_48BIT) |
+ FIELD_PREP(IVPU_MMU_CD_0_TCR_TG0, 0) |
+ FIELD_PREP(IVPU_MMU_CD_0_TCR_IRGN0, 0) |
+ FIELD_PREP(IVPU_MMU_CD_0_TCR_ORGN0, 0) |
+ FIELD_PREP(IVPU_MMU_CD_0_TCR_SH0, 0) |
+ FIELD_PREP(IVPU_MMU_CD_0_TCR_IPS, IVPU_MMU_IPS_48BIT) |
+ FIELD_PREP(IVPU_MMU_CD_0_ASID, ssid) |
+ IVPU_MMU_CD_0_TCR_EPD1 |
+ IVPU_MMU_CD_0_AA64 |
+ IVPU_MMU_CD_0_R |
+ IVPU_MMU_CD_0_ASET;
+ cd[1] = cd_dma & IVPU_MMU_CD_1_TTB0_MASK;
+ cd[2] = 0;
+ cd[3] = 0x0000000000007444;
+
+ /* For global context generate memory fault on VPU */
+ if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID)
+ cd[0] |= IVPU_MMU_CD_0_A;
+
+ if (valid)
+ cd[0] |= IVPU_MMU_CD_0_V;
WRITE_ONCE(entry[1], cd[1]);
WRITE_ONCE(entry[2], cd[2]);
@@ -741,8 +740,8 @@ static int ivpu_mmu_cd_add(struct ivpu_device *vdev, u32 ssid, u64 cd_dma)
if (!ivpu_is_force_snoop_enabled(vdev))
clflush_cache_range(entry, IVPU_MMU_CDTAB_ENT_SIZE);
- ivpu_dbg(vdev, MMU, "CDTAB %s entry (SSID=%u, dma=%pad): 0x%llx, 0x%llx, 0x%llx, 0x%llx\n",
- cd_dma ? "write" : "clear", ssid, &cd_dma, cd[0], cd[1], cd[2], cd[3]);
+ ivpu_dbg(vdev, MMU, "CDTAB set %s entry (SSID=%u, dma=%pad): 0x%llx, 0x%llx, 0x%llx, 0x%llx\n",
+ valid ? "valid" : "invalid", ssid, &cd_dma, cd[0], cd[1], cd[2], cd[3]);
mutex_lock(&mmu->lock);
if (!mmu->on)
@@ -750,38 +749,18 @@ static int ivpu_mmu_cd_add(struct ivpu_device *vdev, u32 ssid, u64 cd_dma)
ret = ivpu_mmu_cmdq_write_cfgi_all(vdev);
if (ret)
- goto unlock;
+ goto err_invalidate;
ret = ivpu_mmu_cmdq_sync(vdev);
+ if (ret)
+ goto err_invalidate;
unlock:
mutex_unlock(&mmu->lock);
- return ret;
-}
-
-static int ivpu_mmu_cd_add_gbl(struct ivpu_device *vdev)
-{
- int ret;
-
- ret = ivpu_mmu_cd_add(vdev, 0, vdev->gctx.pgtable.pgd_dma);
- if (ret)
- ivpu_err(vdev, "Failed to add global CD entry: %d\n", ret);
-
- return ret;
-}
-
-static int ivpu_mmu_cd_add_user(struct ivpu_device *vdev, u32 ssid, dma_addr_t cd_dma)
-{
- int ret;
-
- if (ssid == 0) {
- ivpu_err(vdev, "Invalid SSID: %u\n", ssid);
- return -EINVAL;
- }
-
- ret = ivpu_mmu_cd_add(vdev, ssid, cd_dma);
- if (ret)
- ivpu_err(vdev, "Failed to add CD entry SSID=%u: %d\n", ssid, ret);
+ return 0;
+err_invalidate:
+ WRITE_ONCE(entry[0], 0);
+ mutex_unlock(&mmu->lock);
return ret;
}
@@ -808,12 +787,6 @@ int ivpu_mmu_init(struct ivpu_device *vdev)
return ret;
}
- ret = ivpu_mmu_cd_add_gbl(vdev);
- if (ret) {
- ivpu_err(vdev, "Failed to initialize strtab: %d\n", ret);
- return ret;
- }
-
ret = ivpu_mmu_enable(vdev);
if (ret) {
ivpu_err(vdev, "Failed to resume MMU: %d\n", ret);
@@ -966,12 +939,12 @@ void ivpu_mmu_irq_gerr_handler(struct ivpu_device *vdev)
REGV_WR32(IVPU_MMU_REG_GERRORN, gerror_val);
}
-int ivpu_mmu_set_pgtable(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable)
+int ivpu_mmu_cd_set(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable)
{
- return ivpu_mmu_cd_add_user(vdev, ssid, pgtable->pgd_dma);
+ return ivpu_mmu_cdtab_entry_set(vdev, ssid, pgtable->pgd_dma, true);
}
-void ivpu_mmu_clear_pgtable(struct ivpu_device *vdev, int ssid)
+void ivpu_mmu_cd_clear(struct ivpu_device *vdev, int ssid)
{
- ivpu_mmu_cd_add_user(vdev, ssid, 0); /* 0 will clear CD entry */
+ ivpu_mmu_cdtab_entry_set(vdev, ssid, 0, false);
}
diff --git a/drivers/accel/ivpu/ivpu_mmu.h b/drivers/accel/ivpu/ivpu_mmu.h
index 6fa35c240710..7afea9cd8731 100644
--- a/drivers/accel/ivpu/ivpu_mmu.h
+++ b/drivers/accel/ivpu/ivpu_mmu.h
@@ -40,8 +40,8 @@ struct ivpu_mmu_info {
int ivpu_mmu_init(struct ivpu_device *vdev);
void ivpu_mmu_disable(struct ivpu_device *vdev);
int ivpu_mmu_enable(struct ivpu_device *vdev);
-int ivpu_mmu_set_pgtable(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable);
-void ivpu_mmu_clear_pgtable(struct ivpu_device *vdev, int ssid);
+int ivpu_mmu_cd_set(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable);
+void ivpu_mmu_cd_clear(struct ivpu_device *vdev, int ssid);
int ivpu_mmu_invalidate_tlb(struct ivpu_device *vdev, u16 ssid);
void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev);
diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c
index bbe652a7019d..0af614dfb6f9 100644
--- a/drivers/accel/ivpu/ivpu_mmu_context.c
+++ b/drivers/accel/ivpu/ivpu_mmu_context.c
@@ -90,19 +90,6 @@ static void ivpu_pgtable_free_page(struct ivpu_device *vdev, u64 *cpu_addr, dma_
}
}
-static int ivpu_mmu_pgtable_init(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
-{
- dma_addr_t pgd_dma;
-
- pgtable->pgd_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pgd_dma);
- if (!pgtable->pgd_dma_ptr)
- return -ENOMEM;
-
- pgtable->pgd_dma = pgd_dma;
-
- return 0;
-}
-
static void ivpu_mmu_pgtables_free(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
{
int pgd_idx, pud_idx, pmd_idx;
@@ -140,6 +127,27 @@ static void ivpu_mmu_pgtables_free(struct ivpu_device *vdev, struct ivpu_mmu_pgt
}
ivpu_pgtable_free_page(vdev, pgtable->pgd_dma_ptr, pgtable->pgd_dma);
+ pgtable->pgd_dma_ptr = NULL;
+ pgtable->pgd_dma = 0;
+}
+
+static u64*
+ivpu_mmu_ensure_pgd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
+{
+ u64 *pgd_dma_ptr = pgtable->pgd_dma_ptr;
+ dma_addr_t pgd_dma;
+
+ if (pgd_dma_ptr)
+ return pgd_dma_ptr;
+
+ pgd_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pgd_dma);
+ if (!pgd_dma_ptr)
+ return NULL;
+
+ pgtable->pgd_dma_ptr = pgd_dma_ptr;
+ pgtable->pgd_dma = pgd_dma;
+
+ return pgd_dma_ptr;
}
static u64*
@@ -237,6 +245,12 @@ ivpu_mmu_context_map_page(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx
int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
int pte_idx = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr);
+ drm_WARN_ON(&vdev->drm, ctx->id == IVPU_RESERVED_CONTEXT_MMU_SSID);
+
+ /* Allocate PGD - first level page table if needed */
+ if (!ivpu_mmu_ensure_pgd(vdev, &ctx->pgtable))
+ return -ENOMEM;
+
/* Allocate PUD - second level page table if needed */
if (!ivpu_mmu_ensure_pud(vdev, &ctx->pgtable, pgd_idx))
return -ENOMEM;
@@ -418,6 +432,7 @@ int
ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
u64 vpu_addr, struct sg_table *sgt, bool llc_coherent)
{
+ size_t start_vpu_addr = vpu_addr;
struct scatterlist *sg;
int ret;
u64 prot;
@@ -448,20 +463,36 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
ret = ivpu_mmu_context_map_pages(vdev, ctx, vpu_addr, dma_addr, size, prot);
if (ret) {
ivpu_err(vdev, "Failed to map context pages\n");
- mutex_unlock(&ctx->lock);
- return ret;
+ goto err_unmap_pages;
}
vpu_addr += size;
}
+ if (!ctx->is_cd_valid) {
+ ret = ivpu_mmu_cd_set(vdev, ctx->id, &ctx->pgtable);
+ if (ret) {
+ ivpu_err(vdev, "Failed to set context descriptor for context %u: %d\n",
+ ctx->id, ret);
+ goto err_unmap_pages;
+ }
+ ctx->is_cd_valid = true;
+ }
+
/* Ensure page table modifications are flushed from wc buffers to memory */
wmb();
- mutex_unlock(&ctx->lock);
-
ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id);
- if (ret)
+ if (ret) {
ivpu_err(vdev, "Failed to invalidate TLB for ctx %u: %d\n", ctx->id, ret);
+ goto err_unmap_pages;
+ }
+
+ mutex_unlock(&ctx->lock);
+ return 0;
+
+err_unmap_pages:
+ ivpu_mmu_context_unmap_pages(ctx, start_vpu_addr, vpu_addr - start_vpu_addr);
+ mutex_unlock(&ctx->lock);
return ret;
}
@@ -530,65 +561,79 @@ ivpu_mmu_context_remove_node(struct ivpu_mmu_context *ctx, struct drm_mm_node *n
mutex_unlock(&ctx->lock);
}
-static int
-ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 context_id)
+void ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 context_id)
{
u64 start, end;
- int ret;
mutex_init(&ctx->lock);
- ret = ivpu_mmu_pgtable_init(vdev, &ctx->pgtable);
- if (ret) {
- ivpu_err(vdev, "Failed to initialize pgtable for ctx %u: %d\n", context_id, ret);
- return ret;
- }
-
if (!context_id) {
start = vdev->hw->ranges.global.start;
end = vdev->hw->ranges.shave.end;
} else {
- start = vdev->hw->ranges.user.start;
- end = vdev->hw->ranges.dma.end;
+ start = min_t(u64, vdev->hw->ranges.user.start, vdev->hw->ranges.shave.start);
+ end = max_t(u64, vdev->hw->ranges.user.end, vdev->hw->ranges.dma.end);
}
drm_mm_init(&ctx->mm, start, end - start);
ctx->id = context_id;
-
- return 0;
}
-static void ivpu_mmu_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx)
+void ivpu_mmu_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx)
{
- if (drm_WARN_ON(&vdev->drm, !ctx->pgtable.pgd_dma_ptr))
- return;
+ if (ctx->is_cd_valid) {
+ ivpu_mmu_cd_clear(vdev, ctx->id);
+ ctx->is_cd_valid = false;
+ }
mutex_destroy(&ctx->lock);
ivpu_mmu_pgtables_free(vdev, &ctx->pgtable);
drm_mm_takedown(&ctx->mm);
-
- ctx->pgtable.pgd_dma_ptr = NULL;
- ctx->pgtable.pgd_dma = 0;
}
-int ivpu_mmu_global_context_init(struct ivpu_device *vdev)
+void ivpu_mmu_global_context_init(struct ivpu_device *vdev)
{
- return ivpu_mmu_context_init(vdev, &vdev->gctx, IVPU_GLOBAL_CONTEXT_MMU_SSID);
+ ivpu_mmu_context_init(vdev, &vdev->gctx, IVPU_GLOBAL_CONTEXT_MMU_SSID);
}
void ivpu_mmu_global_context_fini(struct ivpu_device *vdev)
{
- return ivpu_mmu_context_fini(vdev, &vdev->gctx);
+ ivpu_mmu_context_fini(vdev, &vdev->gctx);
}
int ivpu_mmu_reserved_context_init(struct ivpu_device *vdev)
{
- return ivpu_mmu_user_context_init(vdev, &vdev->rctx, IVPU_RESERVED_CONTEXT_MMU_SSID);
+ int ret;
+
+ ivpu_mmu_context_init(vdev, &vdev->rctx, IVPU_RESERVED_CONTEXT_MMU_SSID);
+
+ mutex_lock(&vdev->rctx.lock);
+
+ if (!ivpu_mmu_ensure_pgd(vdev, &vdev->rctx.pgtable)) {
+ ivpu_err(vdev, "Failed to allocate root page table for reserved context\n");
+ ret = -ENOMEM;
+ goto err_ctx_fini;
+ }
+
+ ret = ivpu_mmu_cd_set(vdev, vdev->rctx.id, &vdev->rctx.pgtable);
+ if (ret) {
+ ivpu_err(vdev, "Failed to set context descriptor for reserved context\n");
+ goto err_ctx_fini;
+ }
+
+ mutex_unlock(&vdev->rctx.lock);
+ return ret;
+
+err_ctx_fini:
+ mutex_unlock(&vdev->rctx.lock);
+ ivpu_mmu_context_fini(vdev, &vdev->rctx);
+ return ret;
}
void ivpu_mmu_reserved_context_fini(struct ivpu_device *vdev)
{
- return ivpu_mmu_user_context_fini(vdev, &vdev->rctx);
+ ivpu_mmu_cd_clear(vdev, vdev->rctx.id);
+ ivpu_mmu_context_fini(vdev, &vdev->rctx);
}
void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid)
@@ -603,36 +648,3 @@ void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid)
xa_unlock(&vdev->context_xa);
}
-
-int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 ctx_id)
-{
- int ret;
-
- drm_WARN_ON(&vdev->drm, !ctx_id);
-
- ret = ivpu_mmu_context_init(vdev, ctx, ctx_id);
- if (ret) {
- ivpu_err(vdev, "Failed to initialize context %u: %d\n", ctx_id, ret);
- return ret;
- }
-
- ret = ivpu_mmu_set_pgtable(vdev, ctx_id, &ctx->pgtable);
- if (ret) {
- ivpu_err(vdev, "Failed to set page table for context %u: %d\n", ctx_id, ret);
- goto err_context_fini;
- }
-
- return 0;
-
-err_context_fini:
- ivpu_mmu_context_fini(vdev, ctx);
- return ret;
-}
-
-void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx)
-{
- drm_WARN_ON(&vdev->drm, !ctx->id);
-
- ivpu_mmu_clear_pgtable(vdev, ctx->id);
- ivpu_mmu_context_fini(vdev, ctx);
-}
diff --git a/drivers/accel/ivpu/ivpu_mmu_context.h b/drivers/accel/ivpu/ivpu_mmu_context.h
index 7f9aaf3d10c2..8042fc067062 100644
--- a/drivers/accel/ivpu/ivpu_mmu_context.h
+++ b/drivers/accel/ivpu/ivpu_mmu_context.h
@@ -23,19 +23,20 @@ struct ivpu_mmu_pgtable {
};
struct ivpu_mmu_context {
- struct mutex lock; /* Protects: mm, pgtable */
+ struct mutex lock; /* Protects: mm, pgtable, is_cd_valid */
struct drm_mm mm;
struct ivpu_mmu_pgtable pgtable;
+ bool is_cd_valid;
u32 id;
};
-int ivpu_mmu_global_context_init(struct ivpu_device *vdev);
+void ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 context_id);
+void ivpu_mmu_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx);
+void ivpu_mmu_global_context_init(struct ivpu_device *vdev);
void ivpu_mmu_global_context_fini(struct ivpu_device *vdev);
int ivpu_mmu_reserved_context_init(struct ivpu_device *vdev);
void ivpu_mmu_reserved_context_fini(struct ivpu_device *vdev);
-int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 ctx_id);
-void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx);
void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid);
int ivpu_mmu_context_insert_node(struct ivpu_mmu_context *ctx, const struct ivpu_addr_range *range,
diff --git a/drivers/accel/ivpu/ivpu_ms.c b/drivers/accel/ivpu/ivpu_ms.c
index 2f9d37f5c208..ffe7b10f8a76 100644
--- a/drivers/accel/ivpu/ivpu_ms.c
+++ b/drivers/accel/ivpu/ivpu_ms.c
@@ -11,7 +11,7 @@
#include "ivpu_ms.h"
#include "ivpu_pm.h"
-#define MS_INFO_BUFFER_SIZE SZ_16K
+#define MS_INFO_BUFFER_SIZE SZ_64K
#define MS_NUM_BUFFERS 2
#define MS_READ_PERIOD_MULTIPLIER 2
#define MS_MIN_SAMPLE_PERIOD_NS 1000000
diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c
index 59d3170f5e35..949f4233946c 100644
--- a/drivers/accel/ivpu/ivpu_pm.c
+++ b/drivers/accel/ivpu/ivpu_pm.c
@@ -9,21 +9,25 @@
#include <linux/pm_runtime.h>
#include <linux/reboot.h>
-#include "vpu_boot_api.h"
+#include "ivpu_coredump.h"
#include "ivpu_drv.h"
-#include "ivpu_hw.h"
#include "ivpu_fw.h"
#include "ivpu_fw_log.h"
+#include "ivpu_hw.h"
#include "ivpu_ipc.h"
#include "ivpu_job.h"
#include "ivpu_jsm_msg.h"
#include "ivpu_mmu.h"
#include "ivpu_ms.h"
#include "ivpu_pm.h"
+#include "ivpu_trace.h"
+#include "vpu_boot_api.h"
static bool ivpu_disable_recovery;
+#if IS_ENABLED(CONFIG_DRM_ACCEL_IVPU_DEBUG)
module_param_named_unsafe(disable_recovery, ivpu_disable_recovery, bool, 0644);
MODULE_PARM_DESC(disable_recovery, "Disables recovery when NPU hang is detected");
+#endif
static unsigned long ivpu_tdr_timeout_ms;
module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, ulong, 0644);
@@ -37,6 +41,7 @@ static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev)
ivpu_cmdq_reset_all_contexts(vdev);
ivpu_ipc_reset(vdev);
+ ivpu_fw_log_reset(vdev);
ivpu_fw_load(vdev);
fw->entry_point = fw->cold_boot_entry_point;
}
@@ -123,7 +128,8 @@ static void ivpu_pm_recovery_work(struct work_struct *work)
if (ret)
ivpu_err(vdev, "Failed to resume NPU: %d\n", ret);
- ivpu_fw_log_dump(vdev);
+ ivpu_jsm_state_dump(vdev);
+ ivpu_dev_coredump(vdev);
atomic_inc(&vdev->pm->reset_counter);
atomic_set(&vdev->pm->reset_pending, 1);
@@ -195,6 +201,7 @@ int ivpu_pm_suspend_cb(struct device *dev)
struct ivpu_device *vdev = to_ivpu_device(drm);
unsigned long timeout;
+ trace_pm("suspend");
ivpu_dbg(vdev, PM, "Suspend..\n");
timeout = jiffies + msecs_to_jiffies(vdev->timeout.tdr);
@@ -212,6 +219,7 @@ int ivpu_pm_suspend_cb(struct device *dev)
ivpu_pm_prepare_warm_boot(vdev);
ivpu_dbg(vdev, PM, "Suspend done.\n");
+ trace_pm("suspend done");
return 0;
}
@@ -222,6 +230,7 @@ int ivpu_pm_resume_cb(struct device *dev)
struct ivpu_device *vdev = to_ivpu_device(drm);
int ret;
+ trace_pm("resume");
ivpu_dbg(vdev, PM, "Resume..\n");
ret = ivpu_resume(vdev);
@@ -229,6 +238,7 @@ int ivpu_pm_resume_cb(struct device *dev)
ivpu_err(vdev, "Failed to resume: %d\n", ret);
ivpu_dbg(vdev, PM, "Resume done.\n");
+ trace_pm("resume done");
return ret;
}
@@ -243,6 +253,7 @@ int ivpu_pm_runtime_suspend_cb(struct device *dev)
drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa));
drm_WARN_ON(&vdev->drm, work_pending(&vdev->pm->recovery_work));
+ trace_pm("runtime suspend");
ivpu_dbg(vdev, PM, "Runtime suspend..\n");
ivpu_mmu_disable(vdev);
@@ -262,13 +273,14 @@ int ivpu_pm_runtime_suspend_cb(struct device *dev)
if (!is_idle || ret_d0i3) {
ivpu_err(vdev, "Forcing cold boot due to previous errors\n");
atomic_inc(&vdev->pm->reset_counter);
- ivpu_fw_log_dump(vdev);
+ ivpu_dev_coredump(vdev);
ivpu_pm_prepare_cold_boot(vdev);
} else {
ivpu_pm_prepare_warm_boot(vdev);
}
ivpu_dbg(vdev, PM, "Runtime suspend done.\n");
+ trace_pm("runtime suspend done");
return 0;
}
@@ -279,6 +291,7 @@ int ivpu_pm_runtime_resume_cb(struct device *dev)
struct ivpu_device *vdev = to_ivpu_device(drm);
int ret;
+ trace_pm("runtime resume");
ivpu_dbg(vdev, PM, "Runtime resume..\n");
ret = ivpu_resume(vdev);
@@ -286,6 +299,7 @@ int ivpu_pm_runtime_resume_cb(struct device *dev)
ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret);
ivpu_dbg(vdev, PM, "Runtime resume done.\n");
+ trace_pm("runtime resume done");
return ret;
}
@@ -364,6 +378,7 @@ void ivpu_pm_init(struct ivpu_device *vdev)
pm_runtime_use_autosuspend(dev);
pm_runtime_set_autosuspend_delay(dev, delay);
+ pm_runtime_set_active(dev);
ivpu_dbg(vdev, PM, "Autosuspend delay = %d\n", delay);
}
@@ -378,7 +393,6 @@ void ivpu_pm_enable(struct ivpu_device *vdev)
{
struct device *dev = vdev->drm.dev;
- pm_runtime_set_active(dev);
pm_runtime_allow(dev);
pm_runtime_mark_last_busy(dev);
pm_runtime_put_autosuspend(dev);
@@ -411,7 +425,7 @@ int ivpu_pm_dct_enable(struct ivpu_device *vdev, u8 active_percent)
ret = ivpu_jsm_dct_enable(vdev, active_us, inactive_us);
if (ret) {
- ivpu_err_ratelimited(vdev, "Filed to enable DCT: %d\n", ret);
+ ivpu_err_ratelimited(vdev, "Failed to enable DCT: %d\n", ret);
return ret;
}
@@ -428,7 +442,7 @@ int ivpu_pm_dct_disable(struct ivpu_device *vdev)
ret = ivpu_jsm_dct_disable(vdev);
if (ret) {
- ivpu_err_ratelimited(vdev, "Filed to disable DCT: %d\n", ret);
+ ivpu_err_ratelimited(vdev, "Failed to disable DCT: %d\n", ret);
return ret;
}
diff --git a/drivers/accel/ivpu/ivpu_sysfs.c b/drivers/accel/ivpu/ivpu_sysfs.c
index 913669f1786e..616477fc17fa 100644
--- a/drivers/accel/ivpu/ivpu_sysfs.c
+++ b/drivers/accel/ivpu/ivpu_sysfs.c
@@ -6,6 +6,8 @@
#include <linux/device.h>
#include <linux/err.h>
+#include "ivpu_drv.h"
+#include "ivpu_fw.h"
#include "ivpu_hw.h"
#include "ivpu_sysfs.h"
@@ -39,8 +41,30 @@ npu_busy_time_us_show(struct device *dev, struct device_attribute *attr, char *b
static DEVICE_ATTR_RO(npu_busy_time_us);
+/**
+ * DOC: sched_mode
+ *
+ * The sched_mode is used to report current NPU scheduling mode.
+ *
+ * It returns following strings:
+ * - "HW" - Hardware Scheduler mode
+ * - "OS" - Operating System Scheduler mode
+ *
+ */
+static ssize_t
+sched_mode_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct drm_device *drm = dev_get_drvdata(dev);
+ struct ivpu_device *vdev = to_ivpu_device(drm);
+
+ return sysfs_emit(buf, "%s\n", vdev->fw->sched_mode ? "HW" : "OS");
+}
+
+static DEVICE_ATTR_RO(sched_mode);
+
static struct attribute *ivpu_dev_attrs[] = {
&dev_attr_npu_busy_time_us.attr,
+ &dev_attr_sched_mode.attr,
NULL,
};
diff --git a/drivers/accel/ivpu/ivpu_trace.h b/drivers/accel/ivpu/ivpu_trace.h
new file mode 100644
index 000000000000..eb792038e701
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_trace.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020-2024 Intel Corporation
+ */
+
+#if !defined(__IVPU_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ)
+#define __IVPU_TRACE_H__
+
+#include <linux/tracepoint.h>
+#include "ivpu_drv.h"
+#include "ivpu_job.h"
+#include "vpu_jsm_api.h"
+#include "ivpu_jsm_msg.h"
+#include "ivpu_ipc.h"
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM vpu
+#define TRACE_INCLUDE_FILE ivpu_trace
+
+TRACE_EVENT(pm,
+ TP_PROTO(const char *event),
+ TP_ARGS(event),
+ TP_STRUCT__entry(__field(const char *, event)),
+ TP_fast_assign(__entry->event = event;),
+ TP_printk("%s", __entry->event)
+);
+
+TRACE_EVENT(job,
+ TP_PROTO(const char *event, struct ivpu_job *job),
+ TP_ARGS(event, job),
+ TP_STRUCT__entry(__field(const char *, event)
+ __field(u32, ctx_id)
+ __field(u32, engine_id)
+ __field(u32, job_id)
+ ),
+ TP_fast_assign(__entry->event = event;
+ __entry->ctx_id = job->file_priv->ctx.id;
+ __entry->engine_id = job->engine_idx;
+ __entry->job_id = job->job_id;),
+ TP_printk("%s context:%d engine:%d job:%d",
+ __entry->event,
+ __entry->ctx_id,
+ __entry->engine_id,
+ __entry->job_id)
+);
+
+TRACE_EVENT(jsm,
+ TP_PROTO(const char *event, struct vpu_jsm_msg *msg),
+ TP_ARGS(event, msg),
+ TP_STRUCT__entry(__field(const char *, event)
+ __field(const char *, type)
+ __field(enum vpu_ipc_msg_status, status)
+ __field(u32, request_id)
+ __field(u32, result)
+ ),
+ TP_fast_assign(__entry->event = event;
+ __entry->type = ivpu_jsm_msg_type_to_str(msg->type);
+ __entry->status = msg->status;
+ __entry->request_id = msg->request_id;
+ __entry->result = msg->result;),
+ TP_printk("%s type:%s, status:%#x, id:%#x, result:%#x",
+ __entry->event,
+ __entry->type,
+ __entry->status,
+ __entry->request_id,
+ __entry->result)
+);
+
+#endif /* __IVPU_TRACE_H__ */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#include <trace/define_trace.h>
diff --git a/drivers/accel/ivpu/ivpu_trace_points.c b/drivers/accel/ivpu/ivpu_trace_points.c
new file mode 100644
index 000000000000..f8fb99de0de3
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_trace_points.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-2024 Intel Corporation
+ */
+
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "ivpu_trace.h"
+#endif
diff --git a/drivers/accel/ivpu/vpu_boot_api.h b/drivers/accel/ivpu/vpu_boot_api.h
index 82954b91b748..908e68ea1c39 100644
--- a/drivers/accel/ivpu/vpu_boot_api.h
+++ b/drivers/accel/ivpu/vpu_boot_api.h
@@ -1,14 +1,13 @@
/* SPDX-License-Identifier: MIT */
/*
- * Copyright (c) 2020-2023, Intel Corporation.
+ * Copyright (c) 2020-2024, Intel Corporation.
*/
#ifndef VPU_BOOT_API_H
#define VPU_BOOT_API_H
/*
- * =========== FW API version information beginning ================
- * The bellow values will be used to construct the version info this way:
+ * The below values will be used to construct the version info this way:
* fw_bin_header->api_version[VPU_BOOT_API_VER_ID] = (VPU_BOOT_API_VER_MAJOR << 16) |
* VPU_BOOT_API_VER_MINOR;
* VPU_BOOT_API_VER_PATCH will be ignored. KMD and compatibility is not affected if this changes
@@ -27,19 +26,18 @@
* Minor version changes when API backward compatibility is preserved.
* Resets to 0 if Major version is incremented.
*/
-#define VPU_BOOT_API_VER_MINOR 24
+#define VPU_BOOT_API_VER_MINOR 26
/*
* API header changed (field names, documentation, formatting) but API itself has not been changed
*/
-#define VPU_BOOT_API_VER_PATCH 0
+#define VPU_BOOT_API_VER_PATCH 3
/*
* Index in the API version table
* Must be unique for each API
*/
#define VPU_BOOT_API_VER_INDEX 0
-/* ------------ FW API version information end ---------------------*/
#pragma pack(push, 4)
@@ -164,8 +162,6 @@ enum vpu_trace_destination {
/* VPU 30xx HW component IDs are sequential, so define first and last IDs. */
#define VPU_TRACE_PROC_BIT_30XX_FIRST VPU_TRACE_PROC_BIT_LRT
#define VPU_TRACE_PROC_BIT_30XX_LAST VPU_TRACE_PROC_BIT_SHV_15
-#define VPU_TRACE_PROC_BIT_KMB_FIRST VPU_TRACE_PROC_BIT_30XX_FIRST
-#define VPU_TRACE_PROC_BIT_KMB_LAST VPU_TRACE_PROC_BIT_30XX_LAST
struct vpu_boot_l2_cache_config {
u8 use;
@@ -199,6 +195,17 @@ struct vpu_warm_boot_section {
*/
#define POWER_PROFILE_SURVIVABILITY 0x1
+/**
+ * Enum for dvfs_mode boot param.
+ */
+enum vpu_governor {
+ VPU_GOV_DEFAULT = 0, /* Default Governor for the system */
+ VPU_GOV_MAX_PERFORMANCE = 1, /* Maximum performance governor */
+ VPU_GOV_ON_DEMAND = 2, /* On Demand frequency control governor */
+ VPU_GOV_POWER_SAVE = 3, /* Power save governor */
+ VPU_GOV_ON_DEMAND_PRIORITY_AWARE = 4 /* On Demand priority based governor */
+};
+
struct vpu_boot_params {
u32 magic;
u32 vpu_id;
@@ -301,7 +308,14 @@ struct vpu_boot_params {
u32 temp_sensor_period_ms;
/** PLL ratio for efficient clock frequency */
u32 pn_freq_pll_ratio;
- /** DVFS Mode: Default: 0, Max Performance: 1, On Demand: 2, Power Save: 3 */
+ /**
+ * DVFS Mode:
+ * 0 - Default, DVFS mode selected by the firmware
+ * 1 - Max Performance
+ * 2 - On Demand
+ * 3 - Power Save
+ * 4 - On Demand Priority Aware
+ */
u32 dvfs_mode;
/**
* Depending on DVFS Mode:
@@ -332,8 +346,8 @@ struct vpu_boot_params {
u64 d0i3_entry_vpu_ts;
/*
* The system time of the host operating system in microseconds.
- * E.g the number of microseconds since 1st of January 1970, or whatever date the
- * host operating system uses to maintain system time.
+ * E.g the number of microseconds since 1st of January 1970, or whatever
+ * date the host operating system uses to maintain system time.
* This value will be used to track system time on the VPU.
* The KMD is required to update this value on every VPU reset.
*/
@@ -382,10 +396,7 @@ struct vpu_boot_params {
u32 pad6[734];
};
-/*
- * Magic numbers set between host and vpu to detect corruptio of tracing init
- */
-
+/* Magic numbers set between host and vpu to detect corruption of tracing init */
#define VPU_TRACING_BUFFER_CANARY (0xCAFECAFE)
/* Tracing buffer message format definitions */
@@ -405,7 +416,9 @@ struct vpu_tracing_buffer_header {
u32 host_canary_start;
/* offset from start of buffer for trace entries */
u32 read_index;
- u32 pad_to_cache_line_size_0[14];
+ /* keeps track of wrapping on the reader side */
+ u32 read_wrap_count;
+ u32 pad_to_cache_line_size_0[13];
/* End of first cache line */
/**
diff --git a/drivers/accel/ivpu/vpu_jsm_api.h b/drivers/accel/ivpu/vpu_jsm_api.h
index 33f462b1a25d..7215c144158c 100644
--- a/drivers/accel/ivpu/vpu_jsm_api.h
+++ b/drivers/accel/ivpu/vpu_jsm_api.h
@@ -22,7 +22,7 @@
/*
* Minor version changes when API backward compatibility is preserved.
*/
-#define VPU_JSM_API_VER_MINOR 16
+#define VPU_JSM_API_VER_MINOR 25
/*
* API header changed (field names, documentation, formatting) but API itself has not been changed
@@ -36,7 +36,7 @@
/*
* Number of Priority Bands for Hardware Scheduling
- * Bands: RealTime, Focus, Normal, Idle
+ * Bands: Idle(0), Normal(1), Focus(2), RealTime(3)
*/
#define VPU_HWS_NUM_PRIORITY_BANDS 4
@@ -74,6 +74,7 @@
#define VPU_JSM_STATUS_MVNCI_INTERNAL_ERROR 0xCU
/* Job status returned when the job was preempted mid-inference */
#define VPU_JSM_STATUS_PREEMPTED_MID_INFERENCE 0xDU
+#define VPU_JSM_STATUS_MVNCI_CONTEXT_VIOLATION_HW 0xEU
/*
* Host <-> VPU IPC channels.
@@ -86,18 +87,58 @@
/*
* Job flags bit masks.
*/
-#define VPU_JOB_FLAGS_NULL_SUBMISSION_MASK 0x00000001
-#define VPU_JOB_FLAGS_PRIVATE_DATA_MASK 0xFF000000
+enum {
+ /*
+ * Null submission mask.
+ * When set, batch buffer's commands are not processed but returned as
+ * successful immediately, except fences and timestamps.
+ * When cleared, batch buffer's commands are processed normally.
+ * Used for testing and profiling purposes.
+ */
+ VPU_JOB_FLAGS_NULL_SUBMISSION_MASK = (1 << 0U),
+ /*
+ * Inline command mask.
+ * When set, the object in job queue is an inline command (see struct vpu_inline_cmd below).
+ * When cleared, the object in job queue is a job (see struct vpu_job_queue_entry below).
+ */
+ VPU_JOB_FLAGS_INLINE_CMD_MASK = (1 << 1U),
+ /*
+ * VPU private data mask.
+ * Reserved for the VPU to store private data about the job (or inline command)
+ * while being processed.
+ */
+ VPU_JOB_FLAGS_PRIVATE_DATA_MASK = 0xFFFF0000U
+};
/*
- * Sizes of the reserved areas in jobs, in bytes.
+ * Job queue flags bit masks.
*/
-#define VPU_JOB_RESERVED_BYTES 8
+enum {
+ /*
+ * No job done notification mask.
+ * When set, indicates that no job done notification should be sent for any
+ * job from this queue. When cleared, indicates that job done notification
+ * should be sent for every job completed from this queue.
+ */
+ VPU_JOB_QUEUE_FLAGS_NO_JOB_DONE_MASK = (1 << 0U),
+ /*
+ * Native fence usage mask.
+ * When set, indicates that job queue uses native fences (as inline commands
+ * in job queue). Such queues may also use legacy fences (as commands in batch buffers).
+ * When cleared, indicates the job queue only uses legacy fences.
+ * NOTE: For queues using native fences, VPU expects that all jobs in the queue
+ * are immediately followed by an inline command object. This object is expected
+ * to be a fence signal command in most cases, but can also be a NOP in case the host
+ * does not need per-job fence signalling. Other inline commands objects can be
+ * inserted between "job and inline command" pairs.
+ */
+ VPU_JOB_QUEUE_FLAGS_USE_NATIVE_FENCE_MASK = (1 << 1U),
-/*
- * Sizes of the reserved areas in job queues, in bytes.
- */
-#define VPU_JOB_QUEUE_RESERVED_BYTES 52
+ /*
+ * Enable turbo mode for testing NPU performance; not recommended for regular usage.
+ */
+ VPU_JOB_QUEUE_FLAGS_TURBO_MODE = (1 << 2U)
+};
/*
* Max length (including trailing NULL char) of trace entity name (e.g., the
@@ -141,23 +182,112 @@
#define VPU_HWS_INVALID_CMDQ_HANDLE 0ULL
/*
+ * Inline commands types.
+ */
+/*
+ * NOP.
+ * VPU does nothing other than consuming the inline command object.
+ */
+#define VPU_INLINE_CMD_TYPE_NOP 0x0
+/*
+ * Fence wait.
+ * VPU waits for the fence current value to reach monitored value.
+ * Fence wait operations are executed upon job dispatching. While waiting for
+ * the fence to be satisfied, VPU blocks fetching of the next objects in the queue.
+ * Jobs present in the queue prior to the fence wait object may be processed
+ * concurrently.
+ */
+#define VPU_INLINE_CMD_TYPE_FENCE_WAIT 0x1
+/*
+ * Fence signal.
+ * VPU sets the fence current value to the provided value. If new current value
+ * is equal to or higher than monitored value, VPU sends fence signalled notification
+ * to the host. Fence signal operations are executed upon completion of all the jobs
+ * present in the queue prior to them, and in-order relative to each other in the queue.
+ * But jobs in-between them may be processed concurrently and may complete out-of-order.
+ */
+#define VPU_INLINE_CMD_TYPE_FENCE_SIGNAL 0x2
+
+/*
+ * Job scheduling priority bands for both hardware scheduling and OS scheduling.
+ */
+enum vpu_job_scheduling_priority_band {
+ VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE = 0,
+ VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL = 1,
+ VPU_JOB_SCHEDULING_PRIORITY_BAND_FOCUS = 2,
+ VPU_JOB_SCHEDULING_PRIORITY_BAND_REALTIME = 3,
+ VPU_JOB_SCHEDULING_PRIORITY_BAND_COUNT = 4,
+};
+
+/*
* Job format.
+ * Jobs defines the actual workloads to be executed by a given engine.
*/
struct vpu_job_queue_entry {
- u64 batch_buf_addr; /**< Address of VPU commands batch buffer */
- u32 job_id; /**< Job ID */
- u32 flags; /**< Flags bit field, see VPU_JOB_FLAGS_* above */
- u64 root_page_table_addr; /**< Address of root page table to use for this job */
- u64 root_page_table_update_counter; /**< Page tables update events counter */
- u64 primary_preempt_buf_addr;
+ /**< Address of VPU commands batch buffer */
+ u64 batch_buf_addr;
+ /**< Job ID */
+ u32 job_id;
+ /**< Flags bit field, see VPU_JOB_FLAGS_* above */
+ u32 flags;
+ /**
+ * Doorbell ring timestamp taken by KMD from SoC's global system clock, in
+ * microseconds. NPU can convert this value to its own fixed clock's timebase,
+ * to match other profiling timestamps.
+ */
+ u64 doorbell_timestamp;
+ /**< Extra id for job tracking, used only in the firmware perf traces */
+ u64 host_tracking_id;
/**< Address of the primary preemption buffer to use for this job */
- u32 primary_preempt_buf_size;
+ u64 primary_preempt_buf_addr;
/**< Size of the primary preemption buffer to use for this job */
- u32 secondary_preempt_buf_size;
+ u32 primary_preempt_buf_size;
/**< Size of secondary preemption buffer to use for this job */
- u64 secondary_preempt_buf_addr;
+ u32 secondary_preempt_buf_size;
/**< Address of secondary preemption buffer to use for this job */
- u8 reserved_0[VPU_JOB_RESERVED_BYTES];
+ u64 secondary_preempt_buf_addr;
+ u64 reserved_0;
+};
+
+/*
+ * Inline command format.
+ * Inline commands are the commands executed at scheduler level (typically,
+ * synchronization directives). Inline command and job objects must be of
+ * the same size and have flags field at same offset.
+ */
+struct vpu_inline_cmd {
+ u64 reserved_0;
+ /* Inline command type, see VPU_INLINE_CMD_TYPE_* defines. */
+ u32 type;
+ /* Flags bit field, see VPU_JOB_FLAGS_* above. */
+ u32 flags;
+ /* Inline command payload. Depends on inline command type. */
+ union {
+ /* Fence (wait and signal) commands' payload. */
+ struct {
+ /* Fence object handle. */
+ u64 fence_handle;
+ /* User VA of the current fence value. */
+ u64 current_value_va;
+ /* User VA of the monitored fence value (read-only). */
+ u64 monitored_value_va;
+ /* Value to wait for or write in fence location. */
+ u64 value;
+ /* User VA of the log buffer in which to add log entry on completion. */
+ u64 log_buffer_va;
+ } fence;
+ /* Other commands do not have a payload. */
+ /* Payload definition for future inline commands can be inserted here. */
+ u64 reserved_1[6];
+ } payload;
+};
+
+/*
+ * Job queue slots can be populated either with job objects or inline command objects.
+ */
+union vpu_jobq_slot {
+ struct vpu_job_queue_entry job;
+ struct vpu_inline_cmd inline_cmd;
};
/*
@@ -167,7 +297,21 @@ struct vpu_job_queue_header {
u32 engine_idx;
u32 head;
u32 tail;
- u8 reserved_0[VPU_JOB_QUEUE_RESERVED_BYTES];
+ u32 flags;
+ /* Set to 1 to indicate priority_band field is valid */
+ u32 priority_band_valid;
+ /*
+ * Priority for the work of this job queue, valid only if the HWS is NOT used
+ * and the `priority_band_valid` is set to 1. It is applied only during
+ * the VPU_JSM_MSG_REGISTER_DB message processing.
+ * The device firmware might use the `priority_band` to optimize the power
+ * management logic, but it will not affect the order of jobs.
+ * Available priority bands: @see enum vpu_job_scheduling_priority_band
+ */
+ u32 priority_band;
+ /* Inside realtime band assigns a further priority, limited to 0..31 range */
+ u32 realtime_priority_level;
+ u32 reserved_0[9];
};
/*
@@ -175,7 +319,7 @@ struct vpu_job_queue_header {
*/
struct vpu_job_queue {
struct vpu_job_queue_header header;
- struct vpu_job_queue_entry job[];
+ union vpu_jobq_slot slot[];
};
/**
@@ -197,9 +341,7 @@ enum vpu_trace_entity_type {
struct vpu_hws_log_buffer_header {
/* Written by VPU after adding a log entry. Initialised by host to 0. */
u32 first_free_entry_index;
- /* Incremented by VPU every time the VPU overwrites the 0th entry;
- * initialised by host to 0.
- */
+ /* Incremented by VPU every time the VPU writes the 0th entry; initialised by host to 0. */
u32 wraparound_count;
/*
* This is the number of buffers that can be stored in the log buffer provided by the host.
@@ -230,14 +372,80 @@ struct vpu_hws_log_buffer_entry {
u64 operation_data[2];
};
+/* Native fence log buffer types. */
+enum vpu_hws_native_fence_log_type {
+ VPU_HWS_NATIVE_FENCE_LOG_TYPE_WAITS = 1,
+ VPU_HWS_NATIVE_FENCE_LOG_TYPE_SIGNALS = 2
+};
+
+/* HWS native fence log buffer header. */
+struct vpu_hws_native_fence_log_header {
+ union {
+ struct {
+ /* Index of the first free entry in buffer. */
+ u32 first_free_entry_idx;
+ /* Incremented each time NPU wraps around the buffer to write next entry. */
+ u32 wraparound_count;
+ };
+ /* Field allowing atomic update of both fields above. */
+ u64 atomic_wraparound_and_entry_idx;
+ };
+ /* Log buffer type, see enum vpu_hws_native_fence_log_type. */
+ u64 type;
+ /* Allocated number of entries in the log buffer. */
+ u64 entry_nb;
+ u64 reserved[2];
+};
+
+/* Native fence log operation types. */
+enum vpu_hws_native_fence_log_op {
+ VPU_HWS_NATIVE_FENCE_LOG_OP_SIGNAL_EXECUTED = 0,
+ VPU_HWS_NATIVE_FENCE_LOG_OP_WAIT_UNBLOCKED = 1
+};
+
+/* HWS native fence log entry. */
+struct vpu_hws_native_fence_log_entry {
+ /* Newly signaled/unblocked fence value. */
+ u64 fence_value;
+ /* Native fence object handle to which this operation belongs. */
+ u64 fence_handle;
+ /* Operation type, see enum vpu_hws_native_fence_log_op. */
+ u64 op_type;
+ u64 reserved_0;
+ /*
+ * VPU_HWS_NATIVE_FENCE_LOG_OP_WAIT_UNBLOCKED only: Timestamp at which fence
+ * wait was started (in NPU SysTime).
+ */
+ u64 fence_wait_start_ts;
+ u64 reserved_1;
+ /* Timestamp at which fence operation was completed (in NPU SysTime). */
+ u64 fence_end_ts;
+};
+
+/* Native fence log buffer. */
+struct vpu_hws_native_fence_log_buffer {
+ struct vpu_hws_native_fence_log_header header;
+ struct vpu_hws_native_fence_log_entry entry[];
+};
+
/*
* Host <-> VPU IPC messages types.
*/
enum vpu_ipc_msg_type {
VPU_JSM_MSG_UNKNOWN = 0xFFFFFFFF,
+
/* IPC Host -> Device, Async commands */
VPU_JSM_MSG_ASYNC_CMD = 0x1100,
VPU_JSM_MSG_ENGINE_RESET = VPU_JSM_MSG_ASYNC_CMD,
+ /**
+ * Preempt engine. The NPU stops (preempts) all the jobs currently
+ * executing on the target engine making the engine become idle and ready to
+ * execute new jobs.
+ * NOTE: The NPU does not remove unstarted jobs (if any) from job queues of
+ * the target engine, but it stops processing them (until the queue doorbell
+ * is rung again); the host is responsible to reset the job queue, either
+ * after preemption or when resubmitting jobs to the queue.
+ */
VPU_JSM_MSG_ENGINE_PREEMPT = 0x1101,
VPU_JSM_MSG_REGISTER_DB = 0x1102,
VPU_JSM_MSG_UNREGISTER_DB = 0x1103,
@@ -323,9 +531,10 @@ enum vpu_ipc_msg_type {
* NOTE: Please introduce new ASYNC commands before this one. *
*/
VPU_JSM_MSG_STATE_DUMP = 0x11FF,
+
/* IPC Host -> Device, General commands */
VPU_JSM_MSG_GENERAL_CMD = 0x1200,
- VPU_JSM_MSG_BLOB_DEINIT = VPU_JSM_MSG_GENERAL_CMD,
+ VPU_JSM_MSG_BLOB_DEINIT_DEPRECATED = VPU_JSM_MSG_GENERAL_CMD,
/**
* Control dyndbg behavior by executing a dyndbg command; equivalent to
* Linux command: `echo '<dyndbg_cmd>' > <debugfs>/dynamic_debug/control`.
@@ -335,8 +544,12 @@ enum vpu_ipc_msg_type {
* Perform the save procedure for the D0i3 entry
*/
VPU_JSM_MSG_PWR_D0I3_ENTER = 0x1202,
+
/* IPC Device -> Host, Job completion */
VPU_JSM_MSG_JOB_DONE = 0x2100,
+ /* IPC Device -> Host, Fence signalled */
+ VPU_JSM_MSG_NATIVE_FENCE_SIGNALLED = 0x2101,
+
/* IPC Device -> Host, Async command completion */
VPU_JSM_MSG_ASYNC_CMD_DONE = 0x2200,
VPU_JSM_MSG_ENGINE_RESET_DONE = VPU_JSM_MSG_ASYNC_CMD_DONE,
@@ -422,6 +635,7 @@ enum vpu_ipc_msg_type {
* NOTE: Please introduce new ASYNC responses before this one. *
*/
VPU_JSM_MSG_STATE_DUMP_RSP = 0x22FF,
+
/* IPC Device -> Host, General command completion */
VPU_JSM_MSG_GENERAL_CMD_DONE = 0x2300,
VPU_JSM_MSG_BLOB_DEINIT_DONE = VPU_JSM_MSG_GENERAL_CMD_DONE,
@@ -600,11 +814,6 @@ struct vpu_jsm_metric_streamer_update {
u64 next_buffer_size;
};
-struct vpu_ipc_msg_payload_blob_deinit {
- /* 64-bit unique ID for the blob to be de-initialized. */
- u64 blob_id;
-};
-
struct vpu_ipc_msg_payload_job_done {
/* Engine to which the job was submitted. */
u32 engine_idx;
@@ -622,6 +831,21 @@ struct vpu_ipc_msg_payload_job_done {
u64 cmdq_id;
};
+/*
+ * Notification message upon native fence signalling.
+ * @see VPU_JSM_MSG_NATIVE_FENCE_SIGNALLED
+ */
+struct vpu_ipc_msg_payload_native_fence_signalled {
+ /* Engine ID. */
+ u32 engine_idx;
+ /* Host SSID. */
+ u32 host_ssid;
+ /* CMDQ ID */
+ u64 cmdq_id;
+ /* Fence object handle. */
+ u64 fence_handle;
+};
+
struct vpu_jsm_engine_reset_context {
/* Host SSID */
u32 host_ssid;
@@ -700,11 +924,6 @@ struct vpu_ipc_msg_payload_get_power_level_count_done {
u8 power_limit[16];
};
-struct vpu_ipc_msg_payload_blob_deinit_done {
- /* 64-bit unique ID for the blob de-initialized. */
- u64 blob_id;
-};
-
/* HWS priority band setup request / response */
struct vpu_ipc_msg_payload_hws_priority_band_setup {
/*
@@ -794,7 +1013,10 @@ struct vpu_ipc_msg_payload_hws_set_context_sched_properties {
u32 reserved_0;
/* Command queue id */
u64 cmdq_id;
- /* Priority band to assign to work of this context */
+ /*
+ * Priority band to assign to work of this context.
+ * Available priority bands: @see enum vpu_job_scheduling_priority_band
+ */
u32 priority_band;
/* Inside realtime band assigns a further priority */
u32 realtime_priority_level;
@@ -869,9 +1091,7 @@ struct vpu_ipc_msg_payload_hws_set_scheduling_log {
*/
u64 notify_index;
/*
- * Enable extra events to be output to log for debug of scheduling algorithm.
- * Interpreted by VPU as a boolean to enable or disable, expected values are
- * 0 and 1.
+ * Field is now deprecated, will be removed when KMD is updated to support removal
*/
u32 enable_extra_events;
/* Zero Padding */
@@ -1243,10 +1463,10 @@ union vpu_ipc_msg_payload {
struct vpu_jsm_metric_streamer_start metric_streamer_start;
struct vpu_jsm_metric_streamer_stop metric_streamer_stop;
struct vpu_jsm_metric_streamer_update metric_streamer_update;
- struct vpu_ipc_msg_payload_blob_deinit blob_deinit;
struct vpu_ipc_msg_payload_ssid_release ssid_release;
struct vpu_jsm_hws_register_db hws_register_db;
struct vpu_ipc_msg_payload_job_done job_done;
+ struct vpu_ipc_msg_payload_native_fence_signalled native_fence_signalled;
struct vpu_ipc_msg_payload_engine_reset_done engine_reset_done;
struct vpu_ipc_msg_payload_engine_preempt_done engine_preempt_done;
struct vpu_ipc_msg_payload_register_db_done register_db_done;
@@ -1254,7 +1474,6 @@ union vpu_ipc_msg_payload {
struct vpu_ipc_msg_payload_query_engine_hb_done query_engine_hb_done;
struct vpu_ipc_msg_payload_get_power_level_count_done get_power_level_count_done;
struct vpu_jsm_metric_streamer_done metric_streamer_done;
- struct vpu_ipc_msg_payload_blob_deinit_done blob_deinit_done;
struct vpu_ipc_msg_payload_trace_config trace_config;
struct vpu_ipc_msg_payload_trace_capability_rsp trace_capability;
struct vpu_ipc_msg_payload_trace_get_name trace_get_name;
diff --git a/drivers/accel/qaic/mhi_controller.c b/drivers/accel/qaic/mhi_controller.c
index ada9b1eb0787..8ab82e78dd94 100644
--- a/drivers/accel/qaic/mhi_controller.c
+++ b/drivers/accel/qaic/mhi_controller.c
@@ -405,6 +405,38 @@ static const struct mhi_channel_config aic100_channels[] = {
.auto_queue = false,
.wake_capable = false,
},
+ {
+ .name = "IPCR",
+ .num = 24,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_TO_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "IPCR",
+ .num = 25,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_FROM_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = true,
+ .wake_capable = false,
+ },
};
static struct mhi_event_config aic100_events[] = {
diff --git a/drivers/accel/qaic/qaic_debugfs.c b/drivers/accel/qaic/qaic_debugfs.c
index 20b653d99e52..ba0cf2f94732 100644
--- a/drivers/accel/qaic/qaic_debugfs.c
+++ b/drivers/accel/qaic/qaic_debugfs.c
@@ -64,20 +64,9 @@ static int bootlog_show(struct seq_file *s, void *unused)
return 0;
}
-static int bootlog_fops_open(struct inode *inode, struct file *file)
-{
- return single_open(file, bootlog_show, inode->i_private);
-}
-
-static const struct file_operations bootlog_fops = {
- .owner = THIS_MODULE,
- .open = bootlog_fops_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(bootlog);
-static int read_dbc_fifo_size(struct seq_file *s, void *unused)
+static int fifo_size_show(struct seq_file *s, void *unused)
{
struct dma_bridge_chan *dbc = s->private;
@@ -85,20 +74,9 @@ static int read_dbc_fifo_size(struct seq_file *s, void *unused)
return 0;
}
-static int fifo_size_open(struct inode *inode, struct file *file)
-{
- return single_open(file, read_dbc_fifo_size, inode->i_private);
-}
-
-static const struct file_operations fifo_size_fops = {
- .owner = THIS_MODULE,
- .open = fifo_size_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(fifo_size);
-static int read_dbc_queued(struct seq_file *s, void *unused)
+static int queued_show(struct seq_file *s, void *unused)
{
struct dma_bridge_chan *dbc = s->private;
u32 tail = 0, head = 0;
@@ -115,18 +93,7 @@ static int read_dbc_queued(struct seq_file *s, void *unused)
return 0;
}
-static int queued_open(struct inode *inode, struct file *file)
-{
- return single_open(file, read_dbc_queued, inode->i_private);
-}
-
-static const struct file_operations queued_fops = {
- .owner = THIS_MODULE,
- .open = queued_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(queued);
void qaic_debugfs_init(struct qaic_drm_device *qddev)
{
diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c
index bf10156c334e..4ddf89308ff5 100644
--- a/drivers/accel/qaic/qaic_drv.c
+++ b/drivers/accel/qaic/qaic_drv.c
@@ -32,8 +32,9 @@
#include "qaic_timesync.h"
#include "sahara.h"
-MODULE_IMPORT_NS(DMA_BUF);
+MODULE_IMPORT_NS("DMA_BUF");
+#define PCI_DEV_AIC080 0xa080
#define PCI_DEV_AIC100 0xa100
#define QAIC_NAME "qaic"
#define QAIC_DESC "Qualcomm Cloud AI Accelerators"
@@ -53,12 +54,12 @@ static void qaicm_wq_release(struct drm_device *dev, void *res)
destroy_workqueue(wq);
}
-static struct workqueue_struct *qaicm_wq_init(struct drm_device *dev, const char *fmt)
+static struct workqueue_struct *qaicm_wq_init(struct drm_device *dev, const char *name)
{
struct workqueue_struct *wq;
int ret;
- wq = alloc_workqueue(fmt, WQ_UNBOUND, 0);
+ wq = alloc_workqueue("%s", WQ_UNBOUND, 0, name);
if (!wq)
return ERR_PTR(-ENOMEM);
ret = drmm_add_action_or_reset(dev, qaicm_wq_release, wq);
@@ -365,7 +366,7 @@ static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_de
return NULL;
qdev->dev_state = QAIC_OFFLINE;
- if (id->device == PCI_DEV_AIC100) {
+ if (id->device == PCI_DEV_AIC080 || id->device == PCI_DEV_AIC100) {
qdev->num_dbc = 16;
qdev->dbc = devm_kcalloc(dev, qdev->num_dbc, sizeof(*qdev->dbc), GFP_KERNEL);
if (!qdev->dbc)
@@ -607,6 +608,7 @@ static struct mhi_driver qaic_mhi_driver = {
};
static const struct pci_device_id qaic_ids[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_QCOM, PCI_DEV_AIC080), },
{ PCI_DEVICE(PCI_VENDOR_ID_QCOM, PCI_DEV_AIC100), },
{ }
};
diff --git a/drivers/accel/qaic/sahara.c b/drivers/accel/qaic/sahara.c
index bf94bbab6be5..6d772143d612 100644
--- a/drivers/accel/qaic/sahara.c
+++ b/drivers/accel/qaic/sahara.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. */
+#include <linux/devcoredump.h>
#include <linux/firmware.h>
#include <linux/limits.h>
#include <linux/mhi.h>
@@ -9,6 +10,7 @@
#include <linux/mod_devicetable.h>
#include <linux/overflow.h>
#include <linux/types.h>
+#include <linux/vmalloc.h>
#include <linux/workqueue.h>
#include "sahara.h"
@@ -36,12 +38,14 @@
#define SAHARA_PACKET_MAX_SIZE 0xffffU /* MHI_MAX_MTU */
#define SAHARA_TRANSFER_MAX_SIZE 0x80000
+#define SAHARA_READ_MAX_SIZE 0xfff0U /* Avoid unaligned requests */
#define SAHARA_NUM_TX_BUF DIV_ROUND_UP(SAHARA_TRANSFER_MAX_SIZE,\
SAHARA_PACKET_MAX_SIZE)
#define SAHARA_IMAGE_ID_NONE U32_MAX
#define SAHARA_VERSION 2
#define SAHARA_SUCCESS 0
+#define SAHARA_TABLE_ENTRY_STR_LEN 20
#define SAHARA_MODE_IMAGE_TX_PENDING 0x0
#define SAHARA_MODE_IMAGE_TX_COMPLETE 0x1
@@ -53,6 +57,8 @@
#define SAHARA_END_OF_IMAGE_LENGTH 0x10
#define SAHARA_DONE_LENGTH 0x8
#define SAHARA_RESET_LENGTH 0x8
+#define SAHARA_MEM_DEBUG64_LENGTH 0x18
+#define SAHARA_MEM_READ64_LENGTH 0x18
struct sahara_packet {
__le32 cmd;
@@ -80,18 +86,95 @@ struct sahara_packet {
__le32 image;
__le32 status;
} end_of_image;
+ struct {
+ __le64 table_address;
+ __le64 table_length;
+ } memory_debug64;
+ struct {
+ __le64 memory_address;
+ __le64 memory_length;
+ } memory_read64;
};
};
+struct sahara_debug_table_entry64 {
+ __le64 type;
+ __le64 address;
+ __le64 length;
+ char description[SAHARA_TABLE_ENTRY_STR_LEN];
+ char filename[SAHARA_TABLE_ENTRY_STR_LEN];
+};
+
+struct sahara_dump_table_entry {
+ u64 type;
+ u64 address;
+ u64 length;
+ char description[SAHARA_TABLE_ENTRY_STR_LEN];
+ char filename[SAHARA_TABLE_ENTRY_STR_LEN];
+};
+
+#define SAHARA_DUMP_V1_MAGIC 0x1234567890abcdef
+#define SAHARA_DUMP_V1_VER 1
+struct sahara_memory_dump_meta_v1 {
+ u64 magic;
+ u64 version;
+ u64 dump_size;
+ u64 table_size;
+};
+
+/*
+ * Layout of crashdump provided to user via devcoredump
+ * +------------------------------------------+
+ * | Crashdump Meta structure |
+ * | type: struct sahara_memory_dump_meta_v1 |
+ * +------------------------------------------+
+ * | Crashdump Table |
+ * | type: array of struct |
+ * | sahara_dump_table_entry |
+ * | |
+ * | |
+ * +------------------------------------------+
+ * | Crashdump |
+ * | |
+ * | |
+ * | |
+ * | |
+ * | |
+ * +------------------------------------------+
+ *
+ * First is the metadata header. Userspace can use the magic number to verify
+ * the content type, and then check the version for the rest of the format.
+ * New versions should keep the magic number location/value, and version
+ * location, but increment the version value.
+ *
+ * For v1, the metadata lists the size of the entire dump (header + table +
+ * dump) and the size of the table. Then the dump image table, which describes
+ * the contents of the dump. Finally all the images are listed in order, with
+ * no deadspace in between. Userspace can use the sizes listed in the image
+ * table to reconstruct the individual images.
+ */
+
struct sahara_context {
struct sahara_packet *tx[SAHARA_NUM_TX_BUF];
struct sahara_packet *rx;
- struct work_struct work;
+ struct work_struct fw_work;
+ struct work_struct dump_work;
struct mhi_device *mhi_dev;
const char **image_table;
u32 table_size;
u32 active_image_id;
const struct firmware *firmware;
+ u64 dump_table_address;
+ u64 dump_table_length;
+ size_t rx_size;
+ size_t rx_size_requested;
+ void *mem_dump;
+ size_t mem_dump_sz;
+ struct sahara_dump_table_entry *dump_image;
+ u64 dump_image_offset;
+ void *mem_dump_freespace;
+ u64 dump_images_left;
+ bool is_mem_dump_mode;
};
static const char *aic100_image_table[] = {
@@ -153,6 +236,8 @@ static void sahara_send_reset(struct sahara_context *context)
{
int ret;
+ context->is_mem_dump_mode = false;
+
context->tx[0]->cmd = cpu_to_le32(SAHARA_RESET_CMD);
context->tx[0]->length = cpu_to_le32(SAHARA_RESET_LENGTH);
@@ -186,7 +271,8 @@ static void sahara_hello(struct sahara_context *context)
}
if (le32_to_cpu(context->rx->hello.mode) != SAHARA_MODE_IMAGE_TX_PENDING &&
- le32_to_cpu(context->rx->hello.mode) != SAHARA_MODE_IMAGE_TX_COMPLETE) {
+ le32_to_cpu(context->rx->hello.mode) != SAHARA_MODE_IMAGE_TX_COMPLETE &&
+ le32_to_cpu(context->rx->hello.mode) != SAHARA_MODE_MEMORY_DEBUG) {
dev_err(&context->mhi_dev->dev, "Unsupported hello packet - mode %d\n",
le32_to_cpu(context->rx->hello.mode));
return;
@@ -320,9 +406,70 @@ static void sahara_end_of_image(struct sahara_context *context)
dev_dbg(&context->mhi_dev->dev, "Unable to send done response %d\n", ret);
}
+static void sahara_memory_debug64(struct sahara_context *context)
+{
+ int ret;
+
+ dev_dbg(&context->mhi_dev->dev,
+ "MEMORY DEBUG64 cmd received. length:%d table_address:%#llx table_length:%#llx\n",
+ le32_to_cpu(context->rx->length),
+ le64_to_cpu(context->rx->memory_debug64.table_address),
+ le64_to_cpu(context->rx->memory_debug64.table_length));
+
+ if (le32_to_cpu(context->rx->length) != SAHARA_MEM_DEBUG64_LENGTH) {
+ dev_err(&context->mhi_dev->dev, "Malformed memory debug64 packet - length %d\n",
+ le32_to_cpu(context->rx->length));
+ return;
+ }
+
+ context->dump_table_address = le64_to_cpu(context->rx->memory_debug64.table_address);
+ context->dump_table_length = le64_to_cpu(context->rx->memory_debug64.table_length);
+
+ if (context->dump_table_length % sizeof(struct sahara_debug_table_entry64) != 0 ||
+ !context->dump_table_length) {
+ dev_err(&context->mhi_dev->dev, "Malformed memory debug64 packet - table length %lld\n",
+ context->dump_table_length);
+ return;
+ }
+
+ /*
+ * From this point, the protocol flips. We make memory_read requests to
+ * the device, and the device responds with the raw data. If the device
+ * has an error, it will send an End of Image command. First we need to
+ * request the memory dump table so that we know where all the pieces
+ * of the dump are that we can consume.
+ */
+
+ context->is_mem_dump_mode = true;
+
+ /*
+ * Assume that the table is smaller than our MTU so that we can read it
+ * in one shot. The spec does not put an upper limit on the table, but
+ * no known device will exceed this.
+ */
+ if (context->dump_table_length > SAHARA_PACKET_MAX_SIZE) {
+ dev_err(&context->mhi_dev->dev, "Memory dump table length %lld exceeds supported size. Discarding dump\n",
+ context->dump_table_length);
+ sahara_send_reset(context);
+ return;
+ }
+
+ context->tx[0]->cmd = cpu_to_le32(SAHARA_MEM_READ64_CMD);
+ context->tx[0]->length = cpu_to_le32(SAHARA_MEM_READ64_LENGTH);
+ context->tx[0]->memory_read64.memory_address = cpu_to_le64(context->dump_table_address);
+ context->tx[0]->memory_read64.memory_length = cpu_to_le64(context->dump_table_length);
+
+ context->rx_size_requested = context->dump_table_length;
+
+ ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE, context->tx[0],
+ SAHARA_MEM_READ64_LENGTH, MHI_EOT);
+ if (ret)
+ dev_err(&context->mhi_dev->dev, "Unable to send read for dump table %d\n", ret);
+}
+
static void sahara_processing(struct work_struct *work)
{
- struct sahara_context *context = container_of(work, struct sahara_context, work);
+ struct sahara_context *context = container_of(work, struct sahara_context, fw_work);
int ret;
switch (le32_to_cpu(context->rx->cmd)) {
@@ -338,6 +485,12 @@ static void sahara_processing(struct work_struct *work)
case SAHARA_DONE_RESP_CMD:
/* Intentional do nothing as we don't need to exit an app */
break;
+ case SAHARA_RESET_RESP_CMD:
+ /* Intentional do nothing as we don't need to exit an app */
+ break;
+ case SAHARA_MEM_DEBUG64_CMD:
+ sahara_memory_debug64(context);
+ break;
default:
dev_err(&context->mhi_dev->dev, "Unknown command %d\n",
le32_to_cpu(context->rx->cmd));
@@ -350,6 +503,217 @@ static void sahara_processing(struct work_struct *work)
dev_err(&context->mhi_dev->dev, "Unable to requeue rx buf %d\n", ret);
}
+static void sahara_parse_dump_table(struct sahara_context *context)
+{
+ struct sahara_dump_table_entry *image_out_table;
+ struct sahara_debug_table_entry64 *dev_table;
+ struct sahara_memory_dump_meta_v1 *dump_meta;
+ u64 table_nents;
+ u64 dump_length;
+ int ret;
+ u64 i;
+
+ table_nents = context->dump_table_length / sizeof(*dev_table);
+ context->dump_images_left = table_nents;
+ dump_length = 0;
+
+ dev_table = (struct sahara_debug_table_entry64 *)(context->rx);
+ for (i = 0; i < table_nents; ++i) {
+ /* Do not trust the device, ensure the strings are terminated */
+ dev_table[i].description[SAHARA_TABLE_ENTRY_STR_LEN - 1] = 0;
+ dev_table[i].filename[SAHARA_TABLE_ENTRY_STR_LEN - 1] = 0;
+
+ dump_length = size_add(dump_length, le64_to_cpu(dev_table[i].length));
+ if (dump_length == SIZE_MAX) {
+ /* Discard the dump */
+ sahara_send_reset(context);
+ return;
+ }
+
+ dev_dbg(&context->mhi_dev->dev,
+ "Memory dump table entry %lld type: %lld address: %#llx length: %#llx description: \"%s\" filename \"%s\"\n",
+ i,
+ le64_to_cpu(dev_table[i].type),
+ le64_to_cpu(dev_table[i].address),
+ le64_to_cpu(dev_table[i].length),
+ dev_table[i].description,
+ dev_table[i].filename);
+ }
+
+ dump_length = size_add(dump_length, sizeof(*dump_meta));
+ if (dump_length == SIZE_MAX) {
+ /* Discard the dump */
+ sahara_send_reset(context);
+ return;
+ }
+ dump_length = size_add(dump_length, size_mul(sizeof(*image_out_table), table_nents));
+ if (dump_length == SIZE_MAX) {
+ /* Discard the dump */
+ sahara_send_reset(context);
+ return;
+ }
+
+ context->mem_dump_sz = dump_length;
+ context->mem_dump = vzalloc(dump_length);
+ if (!context->mem_dump) {
+ /* Discard the dump */
+ sahara_send_reset(context);
+ return;
+ }
+
+ /* Populate the dump metadata and table for userspace */
+ dump_meta = context->mem_dump;
+ dump_meta->magic = SAHARA_DUMP_V1_MAGIC;
+ dump_meta->version = SAHARA_DUMP_V1_VER;
+ dump_meta->dump_size = dump_length;
+ dump_meta->table_size = context->dump_table_length;
+
+ image_out_table = context->mem_dump + sizeof(*dump_meta);
+ for (i = 0; i < table_nents; ++i) {
+ image_out_table[i].type = le64_to_cpu(dev_table[i].type);
+ image_out_table[i].address = le64_to_cpu(dev_table[i].address);
+ image_out_table[i].length = le64_to_cpu(dev_table[i].length);
+ strscpy(image_out_table[i].description, dev_table[i].description,
+ SAHARA_TABLE_ENTRY_STR_LEN);
+ strscpy(image_out_table[i].filename,
+ dev_table[i].filename,
+ SAHARA_TABLE_ENTRY_STR_LEN);
+ }
+
+ context->mem_dump_freespace = &image_out_table[i];
+
+ /* Done parsing the table, switch to image dump mode */
+ context->dump_table_length = 0;
+
+ /* Request the first chunk of the first image */
+ context->dump_image = &image_out_table[0];
+ dump_length = min(context->dump_image->length, SAHARA_READ_MAX_SIZE);
+ /* Avoid requesting EOI sized data so that we can identify errors */
+ if (dump_length == SAHARA_END_OF_IMAGE_LENGTH)
+ dump_length = SAHARA_END_OF_IMAGE_LENGTH / 2;
+
+ context->dump_image_offset = dump_length;
+
+ context->tx[0]->cmd = cpu_to_le32(SAHARA_MEM_READ64_CMD);
+ context->tx[0]->length = cpu_to_le32(SAHARA_MEM_READ64_LENGTH);
+ context->tx[0]->memory_read64.memory_address = cpu_to_le64(context->dump_image->address);
+ context->tx[0]->memory_read64.memory_length = cpu_to_le64(dump_length);
+
+ context->rx_size_requested = dump_length;
+
+ ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE, context->tx[0],
+ SAHARA_MEM_READ64_LENGTH, MHI_EOT);
+ if (ret)
+ dev_err(&context->mhi_dev->dev, "Unable to send read for dump content %d\n", ret);
+}
+
+static void sahara_parse_dump_image(struct sahara_context *context)
+{
+ u64 dump_length;
+ int ret;
+
+ memcpy(context->mem_dump_freespace, context->rx, context->rx_size);
+ context->mem_dump_freespace += context->rx_size;
+
+ if (context->dump_image_offset >= context->dump_image->length) {
+ /* Need to move to next image */
+ context->dump_image++;
+ context->dump_images_left--;
+ context->dump_image_offset = 0;
+
+ if (!context->dump_images_left) {
+ /* Dump done */
+ dev_coredumpv(context->mhi_dev->mhi_cntrl->cntrl_dev,
+ context->mem_dump,
+ context->mem_dump_sz,
+ GFP_KERNEL);
+ context->mem_dump = NULL;
+ sahara_send_reset(context);
+ return;
+ }
+ }
+
+ /* Get next image chunk */
+ dump_length = context->dump_image->length - context->dump_image_offset;
+ dump_length = min(dump_length, SAHARA_READ_MAX_SIZE);
+ /* Avoid requesting EOI sized data so that we can identify errors */
+ if (dump_length == SAHARA_END_OF_IMAGE_LENGTH)
+ dump_length = SAHARA_END_OF_IMAGE_LENGTH / 2;
+
+ context->tx[0]->cmd = cpu_to_le32(SAHARA_MEM_READ64_CMD);
+ context->tx[0]->length = cpu_to_le32(SAHARA_MEM_READ64_LENGTH);
+ context->tx[0]->memory_read64.memory_address =
+ cpu_to_le64(context->dump_image->address + context->dump_image_offset);
+ context->tx[0]->memory_read64.memory_length = cpu_to_le64(dump_length);
+
+ context->dump_image_offset += dump_length;
+ context->rx_size_requested = dump_length;
+
+ ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE, context->tx[0],
+ SAHARA_MEM_READ64_LENGTH, MHI_EOT);
+ if (ret)
+ dev_err(&context->mhi_dev->dev,
+ "Unable to send read for dump content %d\n", ret);
+}
+
+static void sahara_dump_processing(struct work_struct *work)
+{
+ struct sahara_context *context = container_of(work, struct sahara_context, dump_work);
+ int ret;
+
+ /*
+ * We should get the expected raw data, but if the device has an error
+ * it is supposed to send EOI with an error code.
+ */
+ if (context->rx_size != context->rx_size_requested &&
+ context->rx_size != SAHARA_END_OF_IMAGE_LENGTH) {
+ dev_err(&context->mhi_dev->dev,
+ "Unexpected response to read_data. Expected size: %#zx got: %#zx\n",
+ context->rx_size_requested,
+ context->rx_size);
+ goto error;
+ }
+
+ if (context->rx_size == SAHARA_END_OF_IMAGE_LENGTH &&
+ le32_to_cpu(context->rx->cmd) == SAHARA_END_OF_IMAGE_CMD) {
+ dev_err(&context->mhi_dev->dev,
+ "Unexpected EOI response to read_data. Status: %d\n",
+ le32_to_cpu(context->rx->end_of_image.status));
+ goto error;
+ }
+
+ if (context->rx_size == SAHARA_END_OF_IMAGE_LENGTH &&
+ le32_to_cpu(context->rx->cmd) != SAHARA_END_OF_IMAGE_CMD) {
+ dev_err(&context->mhi_dev->dev,
+ "Invalid EOI response to read_data. CMD: %d\n",
+ le32_to_cpu(context->rx->cmd));
+ goto error;
+ }
+
+ /*
+ * Need to know if we received the dump table, or part of a dump image.
+ * Since we get raw data, we cannot tell from the data itself. Instead,
+ * we use the stored dump_table_length, which we zero after we read and
+ * process the entire table.
+ */
+ if (context->dump_table_length)
+ sahara_parse_dump_table(context);
+ else
+ sahara_parse_dump_image(context);
+
+ ret = mhi_queue_buf(context->mhi_dev, DMA_FROM_DEVICE, context->rx,
+ SAHARA_PACKET_MAX_SIZE, MHI_EOT);
+ if (ret)
+ dev_err(&context->mhi_dev->dev, "Unable to requeue rx buf %d\n", ret);
+
+ return;
+
+error:
+ vfree(context->mem_dump);
+ context->mem_dump = NULL;
+ sahara_send_reset(context);
+}
+
static int sahara_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
{
struct sahara_context *context;
@@ -382,7 +746,8 @@ static int sahara_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_
}
context->mhi_dev = mhi_dev;
- INIT_WORK(&context->work, sahara_processing);
+ INIT_WORK(&context->fw_work, sahara_processing);
+ INIT_WORK(&context->dump_work, sahara_dump_processing);
context->image_table = aic100_image_table;
context->table_size = ARRAY_SIZE(aic100_image_table);
context->active_image_id = SAHARA_IMAGE_ID_NONE;
@@ -405,7 +770,10 @@ static void sahara_mhi_remove(struct mhi_device *mhi_dev)
{
struct sahara_context *context = dev_get_drvdata(&mhi_dev->dev);
- cancel_work_sync(&context->work);
+ cancel_work_sync(&context->fw_work);
+ cancel_work_sync(&context->dump_work);
+ if (context->mem_dump)
+ vfree(context->mem_dump);
sahara_release_image(context);
mhi_unprepare_from_transfer(mhi_dev);
}
@@ -418,8 +786,14 @@ static void sahara_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result
{
struct sahara_context *context = dev_get_drvdata(&mhi_dev->dev);
- if (!mhi_result->transaction_status)
- schedule_work(&context->work);
+ if (!mhi_result->transaction_status) {
+ context->rx_size = mhi_result->bytes_xferd;
+ if (context->is_mem_dump_mode)
+ schedule_work(&context->dump_work);
+ else
+ schedule_work(&context->fw_work);
+ }
+
}
static const struct mhi_device_id sahara_mhi_match_table[] = {