aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/gpu/drm-kms-helpers.rst36
-rw-r--r--Documentation/gpu/drm-kms.rst14
-rw-r--r--Documentation/gpu/todo.rst18
-rw-r--r--arch/x86/kernel/early-quirks.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c67
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c111
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c184
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cikd.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vid.h2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Makefile4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm1384
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c46
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c1267
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.h42
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c14
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c75
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c31
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c112
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c9
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c14
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_module.c17
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c48
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c167
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c59
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pasid.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h79
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c259
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c78
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c1061
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.h33
-rw-r--r--drivers/gpu/drm/amd/include/kgd_kfd_interface.h52
-rw-r--r--drivers/gpu/drm/amd/include/vi_structs.h2
-rw-r--r--drivers/gpu/drm/armada/armada_crtc.c434
-rw-r--r--drivers/gpu/drm/armada/armada_crtc.h26
-rw-r--r--drivers/gpu/drm/armada/armada_overlay.c302
-rw-r--r--drivers/gpu/drm/armada/armada_trace.h24
-rw-r--r--drivers/gpu/drm/drm_atomic.c45
-rw-r--r--drivers/gpu/drm/drm_edid.c5
-rw-r--r--drivers/gpu/drm/drm_fb_helper.c191
-rw-r--r--drivers/gpu/drm/drm_framebuffer.c2
-rw-r--r--drivers/gpu/drm/drm_syncobj.c45
-rw-r--r--drivers/gpu/drm/etnaviv/Kconfig9
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_buffer.c40
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c1
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c29
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h18
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_drv.c22
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_drv.h14
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_dump.c23
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gem.c197
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gem.h22
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c7
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c199
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gpu.c215
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gpu.h11
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_iommu.c5
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c2
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_mmu.c14
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_perfmon.c4
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_perfmon.h2
-rw-r--r--drivers/gpu/drm/exynos/Kconfig11
-rw-r--r--drivers/gpu/drm/exynos/Makefile1
-rw-r--r--drivers/gpu/drm/exynos/exynos5433_drm_decon.c8
-rw-r--r--drivers/gpu/drm/exynos/exynos7_drm_decon.c2
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_drv.c12
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_drv.h2
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_ipp.c1806
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_ipp.h252
-rw-r--r--drivers/gpu/drm/exynos/regs-decon5433.h (renamed from include/video/exynos5433_decon.h)6
-rw-r--r--drivers/gpu/drm/exynos/regs-decon7.h (renamed from include/video/exynos7_decon.h)8
-rw-r--r--drivers/gpu/drm/i915/Kconfig.debug2
-rw-r--r--drivers/gpu/drm/i915/gvt/cmd_parser.c39
-rw-r--r--drivers/gpu/drm/i915/gvt/display.c81
-rw-r--r--drivers/gpu/drm/i915/gvt/edid.c22
-rw-r--r--drivers/gpu/drm/i915/gvt/fb_decoder.c30
-rw-r--r--drivers/gpu/drm/i915/gvt/gtt.c37
-rw-r--r--drivers/gpu/drm/i915/gvt/gtt.h3
-rw-r--r--drivers/gpu/drm/i915/gvt/gvt.c1
-rw-r--r--drivers/gpu/drm/i915/gvt/gvt.h33
-rw-r--r--drivers/gpu/drm/i915/gvt/handlers.c750
-rw-r--r--drivers/gpu/drm/i915/gvt/kvmgt.c4
-rw-r--r--drivers/gpu/drm/i915/gvt/mmio.c57
-rw-r--r--drivers/gpu/drm/i915/gvt/mmio.h7
-rw-r--r--drivers/gpu/drm/i915/gvt/mmio_context.c238
-rw-r--r--drivers/gpu/drm/i915/gvt/trace.h15
-rw-r--r--drivers/gpu/drm/i915/gvt/vgpu.c24
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c116
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c46
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h503
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c28
-rw-r--r--drivers/gpu/drm/i915/i915_gem_internal.c4
-rw-r--r--drivers/gpu/drm/i915/i915_gem_request.c2
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c31
-rw-r--r--drivers/gpu/drm/i915/i915_memcpy.c7
-rw-r--r--drivers/gpu/drm/i915/i915_params.c33
-rw-r--r--drivers/gpu/drm/i915/i915_params.h4
-rw-r--r--drivers/gpu/drm/i915/i915_pci.c2
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h10
-rw-r--r--drivers/gpu/drm/i915/i915_trace.h40
-rw-r--r--drivers/gpu/drm/i915/i915_utils.h15
-rw-r--r--drivers/gpu/drm/i915/intel_ddi.c6
-rw-r--r--drivers/gpu/drm/i915/intel_device_info.c85
-rw-r--r--drivers/gpu/drm/i915/intel_device_info.h183
-rw-r--r--drivers/gpu/drm/i915/intel_display.c203
-rw-r--r--drivers/gpu/drm/i915/intel_display.h321
-rw-r--r--drivers/gpu/drm/i915/intel_engine_cs.c55
-rw-r--r--drivers/gpu/drm/i915/intel_hangcheck.c10
-rw-r--r--drivers/gpu/drm/i915/intel_i2c.c61
-rw-r--r--drivers/gpu/drm/i915/intel_lpe_audio.c2
-rw-r--r--drivers/gpu/drm/i915/intel_lrc.c17
-rw-r--r--drivers/gpu/drm/i915/intel_opregion.c2
-rw-r--r--drivers/gpu/drm/i915/intel_opregion.h106
-rw-r--r--drivers/gpu/drm/i915/intel_pm.c5
-rw-r--r--drivers/gpu/drm/i915/intel_psr.c16
-rw-r--r--drivers/gpu/drm/i915/intel_runtime_pm.c3
-rw-r--r--drivers/gpu/drm/i915/selftests/huge_pages.c2
-rw-r--r--drivers/gpu/drm/i915/selftests/intel_hangcheck.c315
-rw-r--r--drivers/gpu/drm/imx/imx-drm-core.c1
-rw-r--r--drivers/gpu/drm/imx/ipuv3-plane.c102
-rw-r--r--drivers/gpu/drm/stm/dw_mipi_dsi-stm.c5
-rw-r--r--drivers/gpu/drm/stm/ltdc.c6
-rw-r--r--drivers/gpu/ipu-v3/Kconfig4
-rw-r--r--drivers/gpu/ipu-v3/ipu-cpmem.c1
-rw-r--r--drivers/gpu/ipu-v3/ipu-ic.c1
-rw-r--r--drivers/gpu/ipu-v3/ipu-pre.c29
-rw-r--r--drivers/gpu/ipu-v3/ipu-prg.c84
-rw-r--r--drivers/gpu/ipu-v3/ipu-prv.h4
-rw-r--r--include/drm/drm_atomic.h32
-rw-r--r--include/drm/drm_fb_helper.h38
-rw-r--r--include/drm/drm_framebuffer.h6
-rw-r--r--include/drm/drm_mode_config.h9
-rw-r--r--include/drm/drm_print.h2
-rw-r--r--include/drm/drm_syncobj.h34
-rw-r--r--include/drm/i915_pciids.h32
-rw-r--r--include/uapi/drm/exynos_drm.h192
-rw-r--r--include/uapi/linux/kfd_ioctl.h15
-rw-r--r--include/video/imx-ipu-v3.h2
145 files changed, 8120 insertions, 5400 deletions
diff --git a/Documentation/gpu/drm-kms-helpers.rst b/Documentation/gpu/drm-kms-helpers.rst
index 3ea622876b67..e37557b30f62 100644
--- a/Documentation/gpu/drm-kms-helpers.rst
+++ b/Documentation/gpu/drm-kms-helpers.rst
@@ -74,15 +74,6 @@ Helper Functions Reference
.. kernel-doc:: drivers/gpu/drm/drm_atomic_helper.c
:export:
-Legacy CRTC/Modeset Helper Functions Reference
-==============================================
-
-.. kernel-doc:: drivers/gpu/drm/drm_crtc_helper.c
- :doc: overview
-
-.. kernel-doc:: drivers/gpu/drm/drm_crtc_helper.c
- :export:
-
Simple KMS Helper Reference
===========================
@@ -282,15 +273,6 @@ Flip-work Helper Reference
.. kernel-doc:: drivers/gpu/drm/drm_flip_work.c
:export:
-Plane Helper Reference
-======================
-
-.. kernel-doc:: drivers/gpu/drm/drm_plane_helper.c
- :doc: overview
-
-.. kernel-doc:: drivers/gpu/drm/drm_plane_helper.c
- :export:
-
Auxiliary Modeset Helpers
=========================
@@ -308,3 +290,21 @@ Framebuffer GEM Helper Reference
.. kernel-doc:: drivers/gpu/drm/drm_gem_framebuffer_helper.c
:export:
+
+Legacy Plane Helper Reference
+=============================
+
+.. kernel-doc:: drivers/gpu/drm/drm_plane_helper.c
+ :doc: overview
+
+.. kernel-doc:: drivers/gpu/drm/drm_plane_helper.c
+ :export:
+
+Legacy CRTC/Modeset Helper Functions Reference
+==============================================
+
+.. kernel-doc:: drivers/gpu/drm/drm_crtc_helper.c
+ :doc: overview
+
+.. kernel-doc:: drivers/gpu/drm/drm_crtc_helper.c
+ :export:
diff --git a/Documentation/gpu/drm-kms.rst b/Documentation/gpu/drm-kms.rst
index 307284125d7a..2dcf5b42015d 100644
--- a/Documentation/gpu/drm-kms.rst
+++ b/Documentation/gpu/drm-kms.rst
@@ -263,14 +263,20 @@ Taken all together there's two consequences for the atomic design:
- An atomic update is assembled and validated as an entirely free-standing pile
of structures within the :c:type:`drm_atomic_state <drm_atomic_state>`
- container. Again drivers can subclass that container for their own state
- structure tracking needs. Only when a state is committed is it applied to the
- driver and modeset objects. This way rolling back an update boils down to
- releasing memory and unreferencing objects like framebuffers.
+ container. Driver private state structures are also tracked in the same
+ structure; see the next chapter. Only when a state is committed is it applied
+ to the driver and modeset objects. This way rolling back an update boils down
+ to releasing memory and unreferencing objects like framebuffers.
Read on in this chapter, and also in :ref:`drm_atomic_helper` for more detailed
coverage of specific topics.
+Handling Driver Private State
+-----------------------------
+
+.. kernel-doc:: drivers/gpu/drm/drm_atomic.c
+ :doc: handling driver private state
+
Atomic Mode Setting Function Reference
--------------------------------------
diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst
index f421a54527d2..1e593370f64f 100644
--- a/Documentation/gpu/todo.rst
+++ b/Documentation/gpu/todo.rst
@@ -194,6 +194,24 @@ drm_mode_config_helper_suspend/resume().
Contact: Maintainer of the driver you plan to convert
+Convert drivers to use drm_fb_helper_fbdev_setup/teardown()
+-----------------------------------------------------------
+
+Most drivers can use drm_fb_helper_fbdev_setup() except maybe:
+
+- amdgpu which has special logic to decide whether to call
+ drm_helper_disable_unused_functions()
+
+- armada which isn't atomic and doesn't call
+ drm_helper_disable_unused_functions()
+
+- i915 which calls drm_fb_helper_initial_config() in a worker
+
+Drivers that use drm_framebuffer_remove() to clean up the fbdev framebuffer can
+probably use drm_fb_helper_fbdev_teardown().
+
+Contact: Maintainer of the driver you plan to convert
+
Core refactorings
=================
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 3cbb2c78a9df..bae0d32e327b 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -528,6 +528,7 @@ static const struct pci_device_id intel_early_ids[] __initconst = {
INTEL_SKL_IDS(&gen9_early_ops),
INTEL_BXT_IDS(&gen9_early_ops),
INTEL_KBL_IDS(&gen9_early_ops),
+ INTEL_CFL_IDS(&gen9_early_ops),
INTEL_GLK_IDS(&gen9_early_ops),
INTEL_CNL_IDS(&gen9_early_ops),
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 642bea2c9b3a..d5a2eefd6c3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -959,6 +959,7 @@ struct amdgpu_gfx_config {
};
struct amdgpu_cu_info {
+ uint32_t simd_per_cu;
uint32_t max_waves_per_simd;
uint32_t wave_front_size;
uint32_t max_scratch_slots_per_cu;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 896b16db58aa..335e454e2ee1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -275,14 +275,34 @@ void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
kfree(mem);
}
-uint64_t get_vmem_size(struct kgd_dev *kgd)
+void get_local_mem_info(struct kgd_dev *kgd,
+ struct kfd_local_mem_info *mem_info)
{
- struct amdgpu_device *adev =
- (struct amdgpu_device *)kgd;
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+ uint64_t address_mask = adev->dev->dma_mask ? ~*adev->dev->dma_mask :
+ ~((1ULL << 32) - 1);
+ resource_size_t aper_limit = adev->mc.aper_base + adev->mc.aper_size;
+
+ memset(mem_info, 0, sizeof(*mem_info));
+ if (!(adev->mc.aper_base & address_mask || aper_limit & address_mask)) {
+ mem_info->local_mem_size_public = adev->mc.visible_vram_size;
+ mem_info->local_mem_size_private = adev->mc.real_vram_size -
+ adev->mc.visible_vram_size;
+ } else {
+ mem_info->local_mem_size_public = 0;
+ mem_info->local_mem_size_private = adev->mc.real_vram_size;
+ }
+ mem_info->vram_width = adev->mc.vram_width;
- BUG_ON(kgd == NULL);
+ pr_debug("Address base: 0x%llx limit 0x%llx public 0x%llx private 0x%llx\n",
+ adev->mc.aper_base, aper_limit,
+ mem_info->local_mem_size_public,
+ mem_info->local_mem_size_private);
- return adev->mc.real_vram_size;
+ if (amdgpu_sriov_vf(adev))
+ mem_info->mem_clk_max = adev->clock.default_mclk / 100;
+ else
+ mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100;
}
uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
@@ -298,6 +318,39 @@ uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
- /* The sclk is in quantas of 10kHz */
- return adev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100;
+ /* the sclk is in quantas of 10kHz */
+ if (amdgpu_sriov_vf(adev))
+ return adev->clock.default_sclk / 100;
+
+ return amdgpu_dpm_get_sclk(adev, false) / 100;
+}
+
+void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+ struct amdgpu_cu_info acu_info = adev->gfx.cu_info;
+
+ memset(cu_info, 0, sizeof(*cu_info));
+ if (sizeof(cu_info->cu_bitmap) != sizeof(acu_info.bitmap))
+ return;
+
+ cu_info->cu_active_number = acu_info.number;
+ cu_info->cu_ao_mask = acu_info.ao_cu_mask;
+ memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0],
+ sizeof(acu_info.bitmap));
+ cu_info->num_shader_engines = adev->gfx.config.max_shader_engines;
+ cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
+ cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh;
+ cu_info->simd_per_cu = acu_info.simd_per_cu;
+ cu_info->max_waves_per_simd = acu_info.max_waves_per_simd;
+ cu_info->wave_front_size = acu_info.wave_front_size;
+ cu_info->max_scratch_slots_per_cu = acu_info.max_scratch_slots_per_cu;
+ cu_info->lds_size = acu_info.lds_size;
+}
+
+uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+ return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 8d689ab7e429..2a519f9062ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -56,10 +56,13 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **mem_obj, uint64_t *gpu_addr,
void **cpu_ptr);
void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
-uint64_t get_vmem_size(struct kgd_dev *kgd);
+void get_local_mem_info(struct kgd_dev *kgd,
+ struct kfd_local_mem_info *mem_info);
uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
+void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info);
+uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
#define read_user_wptr(mmptr, wptr, dst) \
({ \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 1ae149456c9f..a9e6aea0e5f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -105,7 +105,14 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr,
uint32_t wptr_shift, uint32_t wptr_mask,
struct mm_struct *mm);
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
+static int kgd_hqd_dump(struct kgd_dev *kgd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs);
+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+ uint32_t __user *wptr, struct mm_struct *mm);
+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs);
static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
uint32_t pipe_id, uint32_t queue_id);
@@ -166,7 +173,7 @@ static int get_tile_config(struct kgd_dev *kgd,
static const struct kfd2kgd_calls kfd2kgd = {
.init_gtt_mem_allocation = alloc_gtt_mem,
.free_gtt_mem = free_gtt_mem,
- .get_vmem_size = get_vmem_size,
+ .get_local_mem_info = get_local_mem_info,
.get_gpu_clock_counter = get_gpu_clock_counter,
.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
.alloc_pasid = amdgpu_pasid_alloc,
@@ -177,6 +184,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
.init_interrupts = kgd_init_interrupts,
.hqd_load = kgd_hqd_load,
.hqd_sdma_load = kgd_hqd_sdma_load,
+ .hqd_dump = kgd_hqd_dump,
+ .hqd_sdma_dump = kgd_hqd_sdma_dump,
.hqd_is_occupied = kgd_hqd_is_occupied,
.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
.hqd_destroy = kgd_hqd_destroy,
@@ -191,6 +200,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
.get_fw_version = get_fw_version,
.set_scratch_backing_va = set_scratch_backing_va,
.get_tile_config = get_tile_config,
+ .get_cu_info = get_cu_info,
+ .get_vram_usage = amdgpu_amdkfd_get_vram_usage
};
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
@@ -375,7 +386,44 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
return 0;
}
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
+static int kgd_hqd_dump(struct kgd_dev *kgd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t i = 0, reg;
+#define HQD_N_REGS (35+4)
+#define DUMP_REG(addr) do { \
+ if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
+ break; \
+ (*dump)[i][0] = (addr) << 2; \
+ (*dump)[i++][1] = RREG32(addr); \
+ } while (0)
+
+ *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ acquire_queue(kgd, pipe_id, queue_id);
+
+ DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0);
+ DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1);
+ DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2);
+ DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3);
+
+ for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++)
+ DUMP_REG(reg);
+
+ release_queue(kgd);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+ uint32_t __user *wptr, struct mm_struct *mm)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct cik_sdma_rlc_registers *m;
@@ -410,10 +458,17 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data);
}
- WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL,
- m->sdma_rlc_doorbell);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0);
+ data = REG_SET_FIELD(m->sdma_rlc_doorbell, SDMA0_RLC0_DOORBELL,
+ ENABLE, 1);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdma_rlc_rb_rptr);
+
+ if (read_user_wptr(mm, wptr, data))
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data);
+ else
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+ m->sdma_rlc_rb_rptr);
+
WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
m->sdma_rlc_virtual_addr);
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base);
@@ -423,8 +478,37 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
m->sdma_rlc_rb_rptr_addr_lo);
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
m->sdma_rlc_rb_rptr_addr_hi);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
- m->sdma_rlc_rb_cntl);
+
+ data = REG_SET_FIELD(m->sdma_rlc_rb_cntl, SDMA0_RLC0_RB_CNTL,
+ RB_ENABLE, 1);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
+
+ return 0;
+}
+
+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET +
+ queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
+ uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (19+4)
+
+ *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
+ DUMP_REG(sdma_offset + reg);
+ for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK;
+ reg++)
+ DUMP_REG(sdma_offset + reg);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
return 0;
}
@@ -575,7 +659,7 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
struct cik_sdma_rlc_registers *m;
uint32_t sdma_base_addr;
uint32_t temp;
- int timeout = utimeout;
+ unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
m = get_sdma_mqd(mqd);
sdma_base_addr = get_sdma_base_addr(m);
@@ -588,10 +672,9 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT)
break;
- if (timeout <= 0)
+ if (time_after(jiffies, end_jiffies))
return -ETIME;
- msleep(20);
- timeout -= 20;
+ usleep_range(500, 1000);
}
WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
@@ -599,6 +682,8 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
+ m->sdma_rlc_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index e9b436bc8dcb..b127259d7d85 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -45,7 +45,7 @@ enum hqd_dequeue_request_type {
RESET_WAVES
};
-struct cik_sdma_rlc_registers;
+struct vi_sdma_mqd;
/*
* Register access functions
@@ -64,7 +64,14 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr,
uint32_t wptr_shift, uint32_t wptr_mask,
struct mm_struct *mm);
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
+static int kgd_hqd_dump(struct kgd_dev *kgd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs);
+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+ uint32_t __user *wptr, struct mm_struct *mm);
+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs);
static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
uint32_t pipe_id, uint32_t queue_id);
static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
@@ -125,7 +132,7 @@ static int get_tile_config(struct kgd_dev *kgd,
static const struct kfd2kgd_calls kfd2kgd = {
.init_gtt_mem_allocation = alloc_gtt_mem,
.free_gtt_mem = free_gtt_mem,
- .get_vmem_size = get_vmem_size,
+ .get_local_mem_info = get_local_mem_info,
.get_gpu_clock_counter = get_gpu_clock_counter,
.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
.alloc_pasid = amdgpu_pasid_alloc,
@@ -136,6 +143,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
.init_interrupts = kgd_init_interrupts,
.hqd_load = kgd_hqd_load,
.hqd_sdma_load = kgd_hqd_sdma_load,
+ .hqd_dump = kgd_hqd_dump,
+ .hqd_sdma_dump = kgd_hqd_sdma_dump,
.hqd_is_occupied = kgd_hqd_is_occupied,
.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
.hqd_destroy = kgd_hqd_destroy,
@@ -152,6 +161,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
.get_fw_version = get_fw_version,
.set_scratch_backing_va = set_scratch_backing_va,
.get_tile_config = get_tile_config,
+ .get_cu_info = get_cu_info,
+ .get_vram_usage = amdgpu_amdkfd_get_vram_usage
};
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
@@ -268,9 +279,15 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
return 0;
}
-static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
+static inline uint32_t get_sdma_base_addr(struct vi_sdma_mqd *m)
{
- return 0;
+ uint32_t retval;
+
+ retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
+ m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
+ pr_debug("kfd: sdma base address: 0x%x\n", retval);
+
+ return retval;
}
static inline struct vi_mqd *get_mqd(void *mqd)
@@ -278,9 +295,9 @@ static inline struct vi_mqd *get_mqd(void *mqd)
return (struct vi_mqd *)mqd;
}
-static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
+static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd)
{
- return (struct cik_sdma_rlc_registers *)mqd;
+ return (struct vi_sdma_mqd *)mqd;
}
static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
@@ -358,8 +375,138 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
return 0;
}
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
+static int kgd_hqd_dump(struct kgd_dev *kgd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs)
{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t i = 0, reg;
+#define HQD_N_REGS (54+4)
+#define DUMP_REG(addr) do { \
+ if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
+ break; \
+ (*dump)[i][0] = (addr) << 2; \
+ (*dump)[i++][1] = RREG32(addr); \
+ } while (0)
+
+ *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ acquire_queue(kgd, pipe_id, queue_id);
+
+ DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0);
+ DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1);
+ DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2);
+ DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3);
+
+ for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_DONES; reg++)
+ DUMP_REG(reg);
+
+ release_queue(kgd);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+ uint32_t __user *wptr, struct mm_struct *mm)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct vi_sdma_mqd *m;
+ unsigned long end_jiffies;
+ uint32_t sdma_base_addr;
+ uint32_t data;
+
+ m = get_sdma_mqd(mqd);
+ sdma_base_addr = get_sdma_base_addr(m);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+ m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
+
+ end_jiffies = msecs_to_jiffies(2000) + jiffies;
+ while (true) {
+ data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+ if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies))
+ return -ETIME;
+ usleep_range(500, 1000);
+ }
+ if (m->sdma_engine_id) {
+ data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL);
+ data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL,
+ RESUME_CTX, 0);
+ WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data);
+ } else {
+ data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL);
+ data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
+ RESUME_CTX, 0);
+ WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data);
+ }
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
+ ENABLE, 1);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
+
+ if (read_user_wptr(mm, wptr, data))
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data);
+ else
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+ m->sdmax_rlcx_rb_rptr);
+
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
+ m->sdmax_rlcx_virtual_addr);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
+ m->sdmax_rlcx_rb_base_hi);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+ m->sdmax_rlcx_rb_rptr_addr_lo);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+ m->sdmax_rlcx_rb_rptr_addr_hi);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
+ RB_ENABLE, 1);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
+
+ return 0;
+}
+
+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET +
+ queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
+ uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (19+4+2+3+7)
+
+ *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
+ DUMP_REG(sdma_offset + reg);
+ for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK;
+ reg++)
+ DUMP_REG(sdma_offset + reg);
+ for (reg = mmSDMA0_RLC0_CSA_ADDR_LO; reg <= mmSDMA0_RLC0_CSA_ADDR_HI;
+ reg++)
+ DUMP_REG(sdma_offset + reg);
+ for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; reg <= mmSDMA0_RLC0_DUMMY_REG;
+ reg++)
+ DUMP_REG(sdma_offset + reg);
+ for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; reg <= mmSDMA0_RLC0_MIDCMD_CNTL;
+ reg++)
+ DUMP_REG(sdma_offset + reg);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
return 0;
}
@@ -388,7 +535,7 @@ static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
- struct cik_sdma_rlc_registers *m;
+ struct vi_sdma_mqd *m;
uint32_t sdma_base_addr;
uint32_t sdma_rlc_rb_cntl;
@@ -509,10 +656,10 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
unsigned int utimeout)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
- struct cik_sdma_rlc_registers *m;
+ struct vi_sdma_mqd *m;
uint32_t sdma_base_addr;
uint32_t temp;
- int timeout = utimeout;
+ unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
m = get_sdma_mqd(mqd);
sdma_base_addr = get_sdma_base_addr(m);
@@ -523,18 +670,19 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
while (true) {
temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
- if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT)
+ if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
break;
- if (timeout <= 0)
+ if (time_after(jiffies, end_jiffies))
return -ETIME;
- msleep(20);
- timeout -= 20;
+ usleep_range(500, 1000);
}
WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, 0);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+ RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
+ SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
+
+ m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h
index 6a9e38a3d2a0..cee6e8a3ad9c 100644
--- a/drivers/gpu/drm/amd/amdgpu/cikd.h
+++ b/drivers/gpu/drm/amd/amdgpu/cikd.h
@@ -562,7 +562,7 @@
#define PRIVATE_BASE(x) ((x) << 0) /* scratch */
#define SHARED_BASE(x) ((x) << 16) /* LDS */
-#define KFD_CIK_SDMA_QUEUE_OFFSET 0x200
+#define KFD_CIK_SDMA_QUEUE_OFFSET (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL)
/* valid for both DEFAULT_MTYPE and APE1_MTYPE */
enum {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 4b5109bfd5f4..a066c5eda135 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -48,6 +48,8 @@
#include "oss/oss_2_0_d.h"
#include "oss/oss_2_0_sh_mask.h"
+#define NUM_SIMD_PER_CU 0x4 /* missing from the gfx_7 IP headers */
+
#define GFX7_NUM_GFX_RINGS 1
#define GFX7_MEC_HPD_SIZE 2048
@@ -5277,6 +5279,11 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
cu_info->number = active_cu_number;
cu_info->ao_cu_mask = ao_cu_mask;
+ cu_info->simd_per_cu = NUM_SIMD_PER_CU;
+ cu_info->max_waves_per_simd = 10;
+ cu_info->max_scratch_slots_per_cu = 32;
+ cu_info->wave_front_size = 64;
+ cu_info->lds_size = 64;
}
const struct amdgpu_ip_block_version gfx_v7_0_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index ff9f1a82630f..4e694ae9f308 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -7116,6 +7116,11 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
cu_info->number = active_cu_number;
cu_info->ao_cu_mask = ao_cu_mask;
+ cu_info->simd_per_cu = NUM_SIMD_PER_CU;
+ cu_info->max_waves_per_simd = 10;
+ cu_info->max_scratch_slots_per_cu = 32;
+ cu_info->wave_front_size = 64;
+ cu_info->lds_size = 64;
}
const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h
index dbf3703cbd1b..19ddd2312e00 100644
--- a/drivers/gpu/drm/amd/amdgpu/vid.h
+++ b/drivers/gpu/drm/amd/amdgpu/vid.h
@@ -27,6 +27,8 @@
#define SDMA1_REGISTER_OFFSET 0x200 /* not a register */
#define SDMA_MAX_INSTANCE 2
+#define KFD_VI_SDMA_QUEUE_OFFSET 0x80 /* not a register */
+
/* crtc instance offsets */
#define CRTC0_REGISTER_OFFSET (0x1b9c - 0x1b9c)
#define CRTC1_REGISTER_OFFSET (0x1d9c - 0x1b9c)
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
index 342c2d937b17..a317e76ffb5e 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -35,6 +35,8 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
kfd_process_queue_manager.o kfd_device_queue_manager.o \
kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \
kfd_interrupt.o kfd_events.o cik_event_interrupt.o \
- kfd_dbgdev.o kfd_dbgmgr.o
+ kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o
+
+amdkfd-$(CONFIG_DEBUG_FS) += kfd_debugfs.o
obj-$(CONFIG_HSA_AMD) += amdkfd.o
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
new file mode 100644
index 000000000000..997a383dcb8b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
@@ -0,0 +1,1384 @@
+/*
+ * Copyright 2015-2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#if 0
+HW (VI) source code for CWSR trap handler
+#Version 18 + multiple trap handler
+
+// this performance-optimal version was originally from Seven Xu at SRDC
+
+// Revison #18 --...
+/* Rev History
+** #1. Branch from gc dv. //gfxip/gfx8/main/src/test/suites/block/cs/sr/cs_trap_handler.sp3#1,#50, #51, #52-53(Skip, Already Fixed by PV), #54-56(merged),#57-58(mergerd, skiped-already fixed by PV)
+** #4. SR Memory Layout:
+** 1. VGPR-SGPR-HWREG-{LDS}
+** 2. tba_hi.bits.26 - reconfigured as the first wave in tg bits, for defer Save LDS for a threadgroup.. performance concern..
+** #5. Update: 1. Accurate g8sr_ts_save_d timestamp
+** #6. Update: 1. Fix s_barrier usage; 2. VGPR s/r using swizzle buffer?(NoNeed, already matched the swizzle pattern, more investigation)
+** #7. Update: 1. don't barrier if noLDS
+** #8. Branch: 1. Branch to ver#0, which is very similar to gc dv version
+** 2. Fix SQ issue by s_sleep 2
+** #9. Update: 1. Fix scc restore failed issue, restore wave_status at last
+** 2. optimize s_buffer save by burst 16sgprs...
+** #10. Update 1. Optimize restore sgpr by busrt 16 sgprs.
+** #11. Update 1. Add 2 more timestamp for debug version
+** #12. Update 1. Add VGPR SR using DWx4, some case improve and some case drop performance
+** #13. Integ 1. Always use MUBUF for PV trap shader...
+** #14. Update 1. s_buffer_store soft clause...
+** #15. Update 1. PERF - sclar write with glc:0/mtype0 to allow L2 combine. perf improvement a lot.
+** #16. Update 1. PRRF - UNROLL LDS_DMA got 2500cycle save in IP tree
+** #17. Update 1. FUNC - LDS_DMA has issues while ATC, replace with ds_read/buffer_store for save part[TODO restore part]
+** 2. PERF - Save LDS before save VGPR to cover LDS save long latency...
+** #18. Update 1. FUNC - Implicitly estore STATUS.VCCZ, which is not writable by s_setreg_b32
+** 2. FUNC - Handle non-CWSR traps
+*/
+
+var G8SR_WDMEM_HWREG_OFFSET = 0
+var G8SR_WDMEM_SGPR_OFFSET = 128 // in bytes
+
+// Keep definition same as the app shader, These 2 time stamps are part of the app shader... Should before any Save and after restore.
+
+var G8SR_DEBUG_TIMESTAMP = 0
+var G8SR_DEBUG_TS_SAVE_D_OFFSET = 40*4 // ts_save_d timestamp offset relative to SGPR_SR_memory_offset
+var s_g8sr_ts_save_s = s[34:35] // save start
+var s_g8sr_ts_sq_save_msg = s[36:37] // The save shader send SAVEWAVE msg to spi
+var s_g8sr_ts_spi_wrexec = s[38:39] // the SPI write the sr address to SQ
+var s_g8sr_ts_save_d = s[40:41] // save end
+var s_g8sr_ts_restore_s = s[42:43] // restore start
+var s_g8sr_ts_restore_d = s[44:45] // restore end
+
+var G8SR_VGPR_SR_IN_DWX4 = 0
+var G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 = 0x00100000 // DWx4 stride is 4*4Bytes
+var G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4 = G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4
+
+
+/*************************************************************************/
+/* control on how to run the shader */
+/*************************************************************************/
+//any hack that needs to be made to run this code in EMU (either because various EMU code are not ready or no compute save & restore in EMU run)
+var EMU_RUN_HACK = 0
+var EMU_RUN_HACK_RESTORE_NORMAL = 0
+var EMU_RUN_HACK_SAVE_NORMAL_EXIT = 0
+var EMU_RUN_HACK_SAVE_SINGLE_WAVE = 0
+var EMU_RUN_HACK_SAVE_FIRST_TIME = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK
+var EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_LO = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK
+var EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_HI = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK
+var SAVE_LDS = 1
+var WG_BASE_ADDR_LO = 0x9000a000
+var WG_BASE_ADDR_HI = 0x0
+var WAVE_SPACE = 0x5000 //memory size that each wave occupies in workgroup state mem
+var CTX_SAVE_CONTROL = 0x0
+var CTX_RESTORE_CONTROL = CTX_SAVE_CONTROL
+var SIM_RUN_HACK = 0 //any hack that needs to be made to run this code in SIM (either because various RTL code are not ready or no compute save & restore in RTL run)
+var SGPR_SAVE_USE_SQC = 1 //use SQC D$ to do the write
+var USE_MTBUF_INSTEAD_OF_MUBUF = 0 //because TC EMU currently asserts on 0 of // overload DFMT field to carry 4 more bits of stride for MUBUF opcodes
+var SWIZZLE_EN = 0 //whether we use swizzled buffer addressing
+
+/**************************************************************************/
+/* variables */
+/**************************************************************************/
+var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23
+var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000
+var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
+
+var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
+var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 6
+var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT = 24
+var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE = 3 //FIXME sq.blk still has 4 bits at this time while SQ programming guide has 3 bits
+
+var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400
+var SQ_WAVE_TRAPSTS_EXCE_MASK = 0x1FF // Exception mask
+var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10
+var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100
+var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8
+var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK = 0x3FF
+var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT = 0x0
+var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE = 10
+var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800
+var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11
+var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21
+
+var SQ_WAVE_IB_STS_RCNT_SHIFT = 16 //FIXME
+var SQ_WAVE_IB_STS_RCNT_SIZE = 4 //FIXME
+var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15 //FIXME
+var SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE = 1 //FIXME
+var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF //FIXME
+
+var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24
+var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27
+
+
+/* Save */
+var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 //stride is 4 bytes
+var S_SAVE_BUF_RSRC_WORD3_MISC = 0x00807FAC //SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE
+
+var S_SAVE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit
+var S_SAVE_SPI_INIT_ATC_SHIFT = 27
+var S_SAVE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype
+var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28
+var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG
+var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26
+
+var S_SAVE_PC_HI_RCNT_SHIFT = 28 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used
+var S_SAVE_PC_HI_RCNT_MASK = 0xF0000000 //FIXME
+var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 27 //FIXME
+var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x08000000 //FIXME
+
+var s_save_spi_init_lo = exec_lo
+var s_save_spi_init_hi = exec_hi
+
+ //tba_lo and tba_hi need to be saved/restored
+var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3??h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]}
+var s_save_pc_hi = ttmp1
+var s_save_exec_lo = ttmp2
+var s_save_exec_hi = ttmp3
+var s_save_status = ttmp4
+var s_save_trapsts = ttmp5 //not really used until the end of the SAVE routine
+var s_save_xnack_mask_lo = ttmp6
+var s_save_xnack_mask_hi = ttmp7
+var s_save_buf_rsrc0 = ttmp8
+var s_save_buf_rsrc1 = ttmp9
+var s_save_buf_rsrc2 = ttmp10
+var s_save_buf_rsrc3 = ttmp11
+
+var s_save_mem_offset = tma_lo
+var s_save_alloc_size = s_save_trapsts //conflict
+var s_save_tmp = s_save_buf_rsrc2 //shared with s_save_buf_rsrc2 (conflict: should not use mem access with s_save_tmp at the same time)
+var s_save_m0 = tma_hi
+
+/* Restore */
+var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE
+var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC
+
+var S_RESTORE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit
+var S_RESTORE_SPI_INIT_ATC_SHIFT = 27
+var S_RESTORE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype
+var S_RESTORE_SPI_INIT_MTYPE_SHIFT = 28
+var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG
+var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26
+
+var S_RESTORE_PC_HI_RCNT_SHIFT = S_SAVE_PC_HI_RCNT_SHIFT
+var S_RESTORE_PC_HI_RCNT_MASK = S_SAVE_PC_HI_RCNT_MASK
+var S_RESTORE_PC_HI_FIRST_REPLAY_SHIFT = S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
+var S_RESTORE_PC_HI_FIRST_REPLAY_MASK = S_SAVE_PC_HI_FIRST_REPLAY_MASK
+
+var s_restore_spi_init_lo = exec_lo
+var s_restore_spi_init_hi = exec_hi
+
+var s_restore_mem_offset = ttmp2
+var s_restore_alloc_size = ttmp3
+var s_restore_tmp = ttmp6 //tba_lo/hi need to be restored
+var s_restore_mem_offset_save = s_restore_tmp //no conflict
+
+var s_restore_m0 = s_restore_alloc_size //no conflict
+
+var s_restore_mode = ttmp7
+
+var s_restore_pc_lo = ttmp0
+var s_restore_pc_hi = ttmp1
+var s_restore_exec_lo = tma_lo //no conflict
+var s_restore_exec_hi = tma_hi //no conflict
+var s_restore_status = ttmp4
+var s_restore_trapsts = ttmp5
+var s_restore_xnack_mask_lo = xnack_mask_lo
+var s_restore_xnack_mask_hi = xnack_mask_hi
+var s_restore_buf_rsrc0 = ttmp8
+var s_restore_buf_rsrc1 = ttmp9
+var s_restore_buf_rsrc2 = ttmp10
+var s_restore_buf_rsrc3 = ttmp11
+
+/**************************************************************************/
+/* trap handler entry points */
+/**************************************************************************/
+/* Shader Main*/
+
+shader main
+ asic(VI)
+ type(CS)
+
+
+ if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) //hack to use trap_id for determining save/restore
+ //FIXME VCCZ un-init assertion s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC
+ s_and_b32 s_save_tmp, s_save_pc_hi, 0xffff0000 //change SCC
+ s_cmp_eq_u32 s_save_tmp, 0x007e0000 //Save: trap_id = 0x7e. Restore: trap_id = 0x7f.
+ s_cbranch_scc0 L_JUMP_TO_RESTORE //do not need to recover STATUS here since we are going to RESTORE
+ //FIXME s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //need to recover STATUS since we are going to SAVE
+ s_branch L_SKIP_RESTORE //NOT restore, SAVE actually
+ else
+ s_branch L_SKIP_RESTORE //NOT restore. might be a regular trap or save
+ end
+
+L_JUMP_TO_RESTORE:
+ s_branch L_RESTORE //restore
+
+L_SKIP_RESTORE:
+
+ s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC
+ s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK //check whether this is for save
+ s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+ s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save
+ s_cbranch_scc1 L_SAVE //this is the operation for save
+
+ // ********* Handle non-CWSR traps *******************
+if (!EMU_RUN_HACK)
+ /* read tba and tma for next level trap handler, ttmp4 is used as s_save_status */
+ s_load_dwordx4 [ttmp8,ttmp9,ttmp10, ttmp11], [tma_lo,tma_hi], 0
+ s_waitcnt lgkmcnt(0)
+ s_or_b32 ttmp7, ttmp8, ttmp9
+ s_cbranch_scc0 L_NO_NEXT_TRAP //next level trap handler not been set
+ s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //restore HW status(SCC)
+ s_setpc_b64 [ttmp8,ttmp9] //jump to next level trap handler
+
+L_NO_NEXT_TRAP:
+ s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+ s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCE_MASK // Check whether it is an exception
+ s_cbranch_scc1 L_EXCP_CASE // Exception, jump back to the shader program directly.
+ s_add_u32 ttmp0, ttmp0, 4 // S_TRAP case, add 4 to ttmp0
+ s_addc_u32 ttmp1, ttmp1, 0
+L_EXCP_CASE:
+ s_and_b32 ttmp1, ttmp1, 0xFFFF
+ s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //restore HW status(SCC)
+ s_rfe_b64 [ttmp0, ttmp1]
+end
+ // ********* End handling of non-CWSR traps *******************
+
+/**************************************************************************/
+/* save routine */
+/**************************************************************************/
+
+L_SAVE:
+
+if G8SR_DEBUG_TIMESTAMP
+ s_memrealtime s_g8sr_ts_save_s
+ s_waitcnt lgkmcnt(0) //FIXME, will cause xnack??
+end
+
+ //check whether there is mem_viol
+ s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+ s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK
+ s_cbranch_scc0 L_NO_PC_REWIND
+
+ //if so, need rewind PC assuming GDS operation gets NACKed
+ s_mov_b32 s_save_tmp, 0 //clear mem_viol bit
+ s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT, 1), s_save_tmp //clear mem_viol bit
+ s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
+ s_sub_u32 s_save_pc_lo, s_save_pc_lo, 8 //pc[31:0]-8
+ s_subb_u32 s_save_pc_hi, s_save_pc_hi, 0x0 // -scc
+
+L_NO_PC_REWIND:
+ s_mov_b32 s_save_tmp, 0 //clear saveCtx bit
+ s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit
+
+ s_mov_b32 s_save_xnack_mask_lo, xnack_mask_lo //save XNACK_MASK
+ s_mov_b32 s_save_xnack_mask_hi, xnack_mask_hi //save XNACK must before any memory operation
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE) //save RCNT
+ s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT
+ s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT, SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE) //save FIRST_REPLAY
+ s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
+ s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS) //clear RCNT and FIRST_REPLAY in IB_STS
+ s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG
+
+ s_setreg_b32 hwreg(HW_REG_IB_STS), s_save_tmp
+
+ /* inform SPI the readiness and wait for SPI's go signal */
+ s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI
+ s_mov_b32 s_save_exec_hi, exec_hi
+ s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive
+
+if G8SR_DEBUG_TIMESTAMP
+ s_memrealtime s_g8sr_ts_sq_save_msg
+ s_waitcnt lgkmcnt(0)
+end
+
+ if (EMU_RUN_HACK)
+
+ else
+ s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC
+ end
+
+ L_SLEEP:
+ s_sleep 0x2 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0
+
+ if (EMU_RUN_HACK)
+
+ else
+ s_cbranch_execz L_SLEEP
+ end
+
+if G8SR_DEBUG_TIMESTAMP
+ s_memrealtime s_g8sr_ts_spi_wrexec
+ s_waitcnt lgkmcnt(0)
+end
+
+ /* setup Resource Contants */
+ if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_SINGLE_WAVE))
+ //calculate wd_addr using absolute thread id
+ v_readlane_b32 s_save_tmp, v9, 0
+ s_lshr_b32 s_save_tmp, s_save_tmp, 6
+ s_mul_i32 s_save_tmp, s_save_tmp, WAVE_SPACE
+ s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO
+ s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI
+ s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL
+ else
+ end
+ if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_SINGLE_WAVE))
+ s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO
+ s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI
+ s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL
+ else
+ end
+
+
+ s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo
+ s_and_b32 s_save_buf_rsrc1, s_save_spi_init_hi, 0x0000FFFF //base_addr_hi
+ s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE
+ s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited
+ s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC
+ s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK
+ s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position
+ s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or ATC
+ s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK
+ s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position
+ s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or MTYPE
+
+ //FIXME right now s_save_m0/s_save_mem_offset use tma_lo/tma_hi (might need to save them before using them?)
+ s_mov_b32 s_save_m0, m0 //save M0
+
+ /* global mem offset */
+ s_mov_b32 s_save_mem_offset, 0x0 //mem offset initial value = 0
+
+
+
+
+ /* save HW registers */
+ //////////////////////////////
+
+ L_SAVE_HWREG:
+ // HWREG SR memory offset : size(VGPR)+size(SGPR)
+ get_vgpr_size_bytes(s_save_mem_offset)
+ get_sgpr_size_bytes(s_save_tmp)
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
+
+
+ s_mov_b32 s_save_buf_rsrc2, 0x4 //NUM_RECORDS in bytes
+ if (SWIZZLE_EN)
+ s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+
+
+ write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) //M0
+
+ if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_FIRST_TIME))
+ s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4
+ s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over
+ s_mov_b32 tba_lo, EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_LO
+ s_mov_b32 tba_hi, EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_HI
+ end
+
+ write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset) //PC
+ write_hwreg_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset)
+ write_hwreg_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset) //EXEC
+ write_hwreg_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset)
+ write_hwreg_to_mem(s_save_status, s_save_buf_rsrc0, s_save_mem_offset) //STATUS
+
+ //s_save_trapsts conflicts with s_save_alloc_size
+ s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+ write_hwreg_to_mem(s_save_trapsts, s_save_buf_rsrc0, s_save_mem_offset) //TRAPSTS
+
+ write_hwreg_to_mem(s_save_xnack_mask_lo, s_save_buf_rsrc0, s_save_mem_offset) //XNACK_MASK_LO
+ write_hwreg_to_mem(s_save_xnack_mask_hi, s_save_buf_rsrc0, s_save_mem_offset) //XNACK_MASK_HI
+
+ //use s_save_tmp would introduce conflict here between s_save_tmp and s_save_buf_rsrc2
+ s_getreg_b32 s_save_m0, hwreg(HW_REG_MODE) //MODE
+ write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
+ write_hwreg_to_mem(tba_lo, s_save_buf_rsrc0, s_save_mem_offset) //TBA_LO
+ write_hwreg_to_mem(tba_hi, s_save_buf_rsrc0, s_save_mem_offset) //TBA_HI
+
+
+
+ /* the first wave in the threadgroup */
+ // save fist_wave bits in tba_hi unused bit.26
+ s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK // extract fisrt wave bit
+ //s_or_b32 tba_hi, s_save_tmp, tba_hi // save first wave bit to tba_hi.bits[26]
+ s_mov_b32 s_save_exec_hi, 0x0
+ s_or_b32 s_save_exec_hi, s_save_tmp, s_save_exec_hi // save first wave bit to s_save_exec_hi.bits[26]
+
+
+ /* save SGPRs */
+ // Save SGPR before LDS save, then the s0 to s4 can be used during LDS save...
+ //////////////////////////////
+
+ // SGPR SR memory offset : size(VGPR)
+ get_vgpr_size_bytes(s_save_mem_offset)
+ // TODO, change RSRC word to rearrange memory layout for SGPRS
+
+ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size
+ s_add_u32 s_save_alloc_size, s_save_alloc_size, 1
+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value)
+
+ if (SGPR_SAVE_USE_SQC)
+ s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 2 //NUM_RECORDS in bytes
+ else
+ s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads)
+ end
+
+ if (SWIZZLE_EN)
+ s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+
+
+ // backup s_save_buf_rsrc0,1 to s_save_pc_lo/hi, since write_16sgpr_to_mem function will change the rsrc0
+ //s_mov_b64 s_save_pc_lo, s_save_buf_rsrc0
+ s_mov_b64 s_save_xnack_mask_lo, s_save_buf_rsrc0
+ s_add_u32 s_save_buf_rsrc0, s_save_buf_rsrc0, s_save_mem_offset
+ s_addc_u32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0
+
+ s_mov_b32 m0, 0x0 //SGPR initial index value =0
+ L_SAVE_SGPR_LOOP:
+ // SGPR is allocated in 16 SGPR granularity
+ s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0]
+ s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0]
+ s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0]
+ s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0]
+ s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0]
+ s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0]
+ s_movrels_b64 s12, s12 //s12 = s[12+m0], s13 = s[13+m0]
+ s_movrels_b64 s14, s14 //s14 = s[14+m0], s15 = s[15+m0]
+
+ write_16sgpr_to_mem(s0, s_save_buf_rsrc0, s_save_mem_offset) //PV: the best performance should be using s_buffer_store_dwordx4
+ s_add_u32 m0, m0, 16 //next sgpr index
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_SGPR_LOOP //SGPR save is complete?
+ // restore s_save_buf_rsrc0,1
+ //s_mov_b64 s_save_buf_rsrc0, s_save_pc_lo
+ s_mov_b64 s_save_buf_rsrc0, s_save_xnack_mask_lo
+
+
+
+
+ /* save first 4 VGPR, then LDS save could use */
+ // each wave will alloc 4 vgprs at least...
+ /////////////////////////////////////////////////////////////////////////////////////
+
+ s_mov_b32 s_save_mem_offset, 0
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+
+ if (SWIZZLE_EN)
+ s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+
+
+ // VGPR Allocated in 4-GPR granularity
+
+if G8SR_VGPR_SR_IN_DWX4
+ // the const stride for DWx4 is 4*4 bytes
+ s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0
+ s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes
+
+ buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+
+ s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0
+ s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE // reset const stride to 4 bytes
+else
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3
+end
+
+
+
+ /* save LDS */
+ //////////////////////////////
+
+ L_SAVE_LDS:
+
+ // Change EXEC to all threads...
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+
+ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) //lds_size
+ s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero?
+ s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE
+
+ s_barrier //LDS is used? wait for other waves in the same TG
+ //s_and_b32 s_save_tmp, tba_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK //exec is still used here
+ s_and_b32 s_save_tmp, s_save_exec_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK //exec is still used here
+ s_cbranch_scc0 L_SAVE_LDS_DONE
+
+ // first wave do LDS save;
+
+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 6 //LDS size in dwords = lds_size * 64dw
+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //LDS size in bytes
+ s_mov_b32 s_save_buf_rsrc2, s_save_alloc_size //NUM_RECORDS in bytes
+
+ // LDS at offset: size(VGPR)+SIZE(SGPR)+SIZE(HWREG)
+ //
+ get_vgpr_size_bytes(s_save_mem_offset)
+ get_sgpr_size_bytes(s_save_tmp)
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, get_hwreg_size_bytes()
+
+
+ if (SWIZZLE_EN)
+ s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+
+ s_mov_b32 m0, 0x0 //lds_offset initial value = 0
+
+
+var LDS_DMA_ENABLE = 0
+var UNROLL = 0
+if UNROLL==0 && LDS_DMA_ENABLE==1
+ s_mov_b32 s3, 256*2
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ L_SAVE_LDS_LOOP:
+ //TODO: looks the 2 buffer_store/load clause for s/r will hurt performance.???
+ if (SAVE_LDS) //SPI always alloc LDS space in 128DW granularity
+ buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 // first 64DW
+ buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256 // second 64DW
+ end
+
+ s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s3 //mem offset increased by 256 bytes
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_LDS_LOOP //LDS save is complete?
+
+elsif LDS_DMA_ENABLE==1 && UNROLL==1 // UNROOL , has ichace miss
+ // store from higest LDS address to lowest
+ s_mov_b32 s3, 256*2
+ s_sub_u32 m0, s_save_alloc_size, s3
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, m0
+ s_lshr_b32 s_save_alloc_size, s_save_alloc_size, 9 // how many 128 trunks...
+ s_sub_u32 s_save_alloc_size, 128, s_save_alloc_size // store from higheset addr to lowest
+ s_mul_i32 s_save_alloc_size, s_save_alloc_size, 6*4 // PC offset increment, each LDS save block cost 6*4 Bytes instruction
+ s_add_u32 s_save_alloc_size, s_save_alloc_size, 3*4 //2is the below 2 inst...//s_addc and s_setpc
+ s_nop 0
+ s_nop 0
+ s_nop 0 //pad 3 dw to let LDS_DMA align with 64Bytes
+ s_getpc_b64 s[0:1] // reuse s[0:1], since s[0:1] already saved
+ s_add_u32 s0, s0,s_save_alloc_size
+ s_addc_u32 s1, s1, 0
+ s_setpc_b64 s[0:1]
+
+
+ for var i =0; i< 128; i++
+ // be careful to make here a 64Byte aligned address, which could improve performance...
+ buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:0 // first 64DW
+ buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256 // second 64DW
+
+ if i!=127
+ s_sub_u32 m0, m0, s3 // use a sgpr to shrink 2DW-inst to 1DW inst to improve performance , i.e. pack more LDS_DMA inst to one Cacheline
+ s_sub_u32 s_save_mem_offset, s_save_mem_offset, s3
+ end
+ end
+
+else // BUFFER_STORE
+ v_mbcnt_lo_u32_b32 v2, 0xffffffff, 0x0
+ v_mbcnt_hi_u32_b32 v3, 0xffffffff, v2 // tid
+ v_mul_i32_i24 v2, v3, 8 // tid*8
+ v_mov_b32 v3, 256*2
+ s_mov_b32 m0, 0x10000
+ s_mov_b32 s0, s_save_buf_rsrc3
+ s_and_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, 0xFF7FFFFF // disable add_tid
+ s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, 0x58000 //DFMT
+
+L_SAVE_LDS_LOOP_VECTOR:
+ ds_read_b64 v[0:1], v2 //x =LDS[a], byte address
+ s_waitcnt lgkmcnt(0)
+ buffer_store_dwordx2 v[0:1], v2, s_save_buf_rsrc0, s_save_mem_offset offen:1 glc:1 slc:1
+// s_waitcnt vmcnt(0)
+ v_add_u32 v2, vcc[0:1], v2, v3
+ v_cmp_lt_u32 vcc[0:1], v2, s_save_alloc_size
+ s_cbranch_vccnz L_SAVE_LDS_LOOP_VECTOR
+
+ // restore rsrc3
+ s_mov_b32 s_save_buf_rsrc3, s0
+
+end
+
+L_SAVE_LDS_DONE:
+
+
+ /* save VGPRs - set the Rest VGPRs */
+ //////////////////////////////////////////////////////////////////////////////////////
+ L_SAVE_VGPR:
+ // VGPR SR memory offset: 0
+ // TODO rearrange the RSRC words to use swizzle for VGPR save...
+
+ s_mov_b32 s_save_mem_offset, (0+256*4) // for the rest VGPRs
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+
+ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size
+ s_add_u32 s_save_alloc_size, s_save_alloc_size, 1
+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) //FIXME for GFX, zero is possible
+ s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4)
+ if (SWIZZLE_EN)
+ s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+
+
+ // VGPR Allocated in 4-GPR granularity
+
+if G8SR_VGPR_SR_IN_DWX4
+ // the const stride for DWx4 is 4*4 bytes
+ s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0
+ s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes
+
+ s_mov_b32 m0, 4 // skip first 4 VGPRs
+ s_cmp_lt_u32 m0, s_save_alloc_size
+ s_cbranch_scc0 L_SAVE_VGPR_LOOP_END // no more vgprs
+
+ s_set_gpr_idx_on m0, 0x1 // This will change M0
+ s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 // because above inst change m0
+L_SAVE_VGPR_LOOP:
+ v_mov_b32 v0, v0 // v0 = v[0+m0]
+ v_mov_b32 v1, v1
+ v_mov_b32 v2, v2
+ v_mov_b32 v3, v3
+
+
+ buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ s_add_u32 m0, m0, 4
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4
+ s_cmp_lt_u32 m0, s_save_alloc_size
+ s_cbranch_scc1 L_SAVE_VGPR_LOOP //VGPR save is complete?
+ s_set_gpr_idx_off
+L_SAVE_VGPR_LOOP_END:
+
+ s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0
+ s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE // reset const stride to 4 bytes
+else
+ // VGPR store using dw burst
+ s_mov_b32 m0, 0x4 //VGPR initial index value =0
+ s_cmp_lt_u32 m0, s_save_alloc_size
+ s_cbranch_scc0 L_SAVE_VGPR_END
+
+
+ s_set_gpr_idx_on m0, 0x1 //M0[7:0] = M0[7:0] and M0[15:12] = 0x1
+ s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 //add 0x1000 since we compare m0 against it later
+
+ L_SAVE_VGPR_LOOP:
+ v_mov_b32 v0, v0 //v0 = v[0+m0]
+ v_mov_b32 v1, v1 //v0 = v[0+m0]
+ v_mov_b32 v2, v2 //v0 = v[0+m0]
+ v_mov_b32 v3, v3 //v0 = v[0+m0]
+
+ if(USE_MTBUF_INSTEAD_OF_MUBUF)
+ tbuffer_store_format_x v0, v0, s_save_buf_rsrc0, s_save_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1
+ else
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3
+ end
+
+ s_add_u32 m0, m0, 4 //next vgpr index
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 //every buffer_store_dword does 256 bytes
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_VGPR_LOOP //VGPR save is complete?
+ s_set_gpr_idx_off
+end
+
+L_SAVE_VGPR_END:
+
+
+
+
+
+
+ /* S_PGM_END_SAVED */ //FIXME graphics ONLY
+ if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_NORMAL_EXIT))
+ s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
+ s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4
+ s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over
+ s_rfe_b64 s_save_pc_lo //Return to the main shader program
+ else
+ end
+
+// Save Done timestamp
+if G8SR_DEBUG_TIMESTAMP
+ s_memrealtime s_g8sr_ts_save_d
+ // SGPR SR memory offset : size(VGPR)
+ get_vgpr_size_bytes(s_save_mem_offset)
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, G8SR_DEBUG_TS_SAVE_D_OFFSET
+ s_waitcnt lgkmcnt(0) //FIXME, will cause xnack??
+ // Need reset rsrc2??
+ s_mov_b32 m0, s_save_mem_offset
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ s_buffer_store_dwordx2 s_g8sr_ts_save_d, s_save_buf_rsrc0, m0 glc:1
+end
+
+
+ s_branch L_END_PGM
+
+
+
+/**************************************************************************/
+/* restore routine */
+/**************************************************************************/
+
+L_RESTORE:
+ /* Setup Resource Contants */
+ if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL))
+ //calculate wd_addr using absolute thread id
+ v_readlane_b32 s_restore_tmp, v9, 0
+ s_lshr_b32 s_restore_tmp, s_restore_tmp, 6
+ s_mul_i32 s_restore_tmp, s_restore_tmp, WAVE_SPACE
+ s_add_i32 s_restore_spi_init_lo, s_restore_tmp, WG_BASE_ADDR_LO
+ s_mov_b32 s_restore_spi_init_hi, WG_BASE_ADDR_HI
+ s_and_b32 s_restore_spi_init_hi, s_restore_spi_init_hi, CTX_RESTORE_CONTROL
+ else
+ end
+
+if G8SR_DEBUG_TIMESTAMP
+ s_memrealtime s_g8sr_ts_restore_s
+ s_waitcnt lgkmcnt(0) //FIXME, will cause xnack??
+ // tma_lo/hi are sgpr 110, 111, which will not used for 112 SGPR allocated case...
+ s_mov_b32 s_restore_pc_lo, s_g8sr_ts_restore_s[0]
+ s_mov_b32 s_restore_pc_hi, s_g8sr_ts_restore_s[1] //backup ts to ttmp0/1, sicne exec will be finally restored..
+end
+
+
+
+ s_mov_b32 s_restore_buf_rsrc0, s_restore_spi_init_lo //base_addr_lo
+ s_and_b32 s_restore_buf_rsrc1, s_restore_spi_init_hi, 0x0000FFFF //base_addr_hi
+ s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE
+ s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes)
+ s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC
+ s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK
+ s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position
+ s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or ATC
+ s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK
+ s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position
+ s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or MTYPE
+
+ /* global mem offset */
+// s_mov_b32 s_restore_mem_offset, 0x0 //mem offset initial value = 0
+
+ /* the first wave in the threadgroup */
+ s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK
+ s_cbranch_scc0 L_RESTORE_VGPR
+
+ /* restore LDS */
+ //////////////////////////////
+ L_RESTORE_LDS:
+
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on //be consistent with SAVE although can be moved ahead
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+
+ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) //lds_size
+ s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //lds_size is zero?
+ s_cbranch_scc0 L_RESTORE_VGPR //no lds used? jump to L_RESTORE_VGPR
+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 6 //LDS size in dwords = lds_size * 64dw
+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //LDS size in bytes
+ s_mov_b32 s_restore_buf_rsrc2, s_restore_alloc_size //NUM_RECORDS in bytes
+
+ // LDS at offset: size(VGPR)+SIZE(SGPR)+SIZE(HWREG)
+ //
+ get_vgpr_size_bytes(s_restore_mem_offset)
+ get_sgpr_size_bytes(s_restore_tmp)
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_hwreg_size_bytes() //FIXME, Check if offset overflow???
+
+
+ if (SWIZZLE_EN)
+ s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+ s_mov_b32 m0, 0x0 //lds_offset initial value = 0
+
+ L_RESTORE_LDS_LOOP:
+ if (SAVE_LDS)
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:256 // second 64DW
+ end
+ s_add_u32 m0, m0, 256*2 // 128 DW
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*2 //mem offset increased by 128DW
+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_RESTORE_LDS_LOOP //LDS restore is complete?
+
+
+ /* restore VGPRs */
+ //////////////////////////////
+ L_RESTORE_VGPR:
+ // VGPR SR memory offset : 0
+ s_mov_b32 s_restore_mem_offset, 0x0
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on //be consistent with SAVE although can be moved ahead
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+
+ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size
+ s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1
+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value)
+ s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4)
+ if (SWIZZLE_EN)
+ s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+
+if G8SR_VGPR_SR_IN_DWX4
+ get_vgpr_size_bytes(s_restore_mem_offset)
+ s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4
+
+ // the const stride for DWx4 is 4*4 bytes
+ s_and_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, 0x0000FFFF // reset const stride to 0
+ s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes
+
+ s_mov_b32 m0, s_restore_alloc_size
+ s_set_gpr_idx_on m0, 0x8 // Note.. This will change m0
+
+L_RESTORE_VGPR_LOOP:
+ buffer_load_dwordx4 v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
+ s_waitcnt vmcnt(0)
+ s_sub_u32 m0, m0, 4
+ v_mov_b32 v0, v0 // v[0+m0] = v0
+ v_mov_b32 v1, v1
+ v_mov_b32 v2, v2
+ v_mov_b32 v3, v3
+ s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4
+ s_cmp_eq_u32 m0, 0x8000
+ s_cbranch_scc0 L_RESTORE_VGPR_LOOP
+ s_set_gpr_idx_off
+
+ s_and_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, 0x0000FFFF // reset const stride to 0
+ s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE // const stride to 4*4 bytes
+
+else
+ // VGPR load using dw burst
+ s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4
+ s_mov_b32 m0, 4 //VGPR initial index value = 1
+ s_set_gpr_idx_on m0, 0x8 //M0[7:0] = M0[7:0] and M0[15:12] = 0x8
+ s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 0x8000 //add 0x8000 since we compare m0 against it later
+
+ L_RESTORE_VGPR_LOOP:
+ if(USE_MTBUF_INSTEAD_OF_MUBUF)
+ tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1
+ else
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*3
+ end
+ s_waitcnt vmcnt(0) //ensure data ready
+ v_mov_b32 v0, v0 //v[0+m0] = v0
+ v_mov_b32 v1, v1
+ v_mov_b32 v2, v2
+ v_mov_b32 v3, v3
+ s_add_u32 m0, m0, 4 //next vgpr index
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 //every buffer_load_dword does 256 bytes
+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_RESTORE_VGPR_LOOP //VGPR restore (except v0) is complete?
+ s_set_gpr_idx_off
+ /* VGPR restore on v0 */
+ if(USE_MTBUF_INSTEAD_OF_MUBUF)
+ tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1
+ else
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*3
+ end
+
+end
+
+ /* restore SGPRs */
+ //////////////////////////////
+
+ // SGPR SR memory offset : size(VGPR)
+ get_vgpr_size_bytes(s_restore_mem_offset)
+ get_sgpr_size_bytes(s_restore_tmp)
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+ s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 16*4 // restore SGPR from S[n] to S[0], by 16 sgprs group
+ // TODO, change RSRC word to rearrange memory layout for SGPRS
+
+ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size
+ s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1
+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value)
+
+ if (SGPR_SAVE_USE_SQC)
+ s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 2 //NUM_RECORDS in bytes
+ else
+ s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads)
+ end
+ if (SWIZZLE_EN)
+ s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+
+ /* If 112 SGPRs ar allocated, 4 sgprs are not used TBA(108,109),TMA(110,111),
+ However, we are safe to restore these 4 SGPRs anyway, since TBA,TMA will later be restored by HWREG
+ */
+ s_mov_b32 m0, s_restore_alloc_size
+
+ L_RESTORE_SGPR_LOOP:
+ read_16sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset) //PV: further performance improvement can be made
+ s_waitcnt lgkmcnt(0) //ensure data ready
+
+ s_sub_u32 m0, m0, 16 // Restore from S[n] to S[0]
+
+ s_movreld_b64 s0, s0 //s[0+m0] = s0
+ s_movreld_b64 s2, s2
+ s_movreld_b64 s4, s4
+ s_movreld_b64 s6, s6
+ s_movreld_b64 s8, s8
+ s_movreld_b64 s10, s10
+ s_movreld_b64 s12, s12
+ s_movreld_b64 s14, s14
+
+ s_cmp_eq_u32 m0, 0 //scc = (m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc0 L_RESTORE_SGPR_LOOP //SGPR restore (except s0) is complete?
+
+ /* restore HW registers */
+ //////////////////////////////
+ L_RESTORE_HWREG:
+
+
+if G8SR_DEBUG_TIMESTAMP
+ s_mov_b32 s_g8sr_ts_restore_s[0], s_restore_pc_lo
+ s_mov_b32 s_g8sr_ts_restore_s[1], s_restore_pc_hi
+end
+
+ // HWREG SR memory offset : size(VGPR)+size(SGPR)
+ get_vgpr_size_bytes(s_restore_mem_offset)
+ get_sgpr_size_bytes(s_restore_tmp)
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+
+
+ s_mov_b32 s_restore_buf_rsrc2, 0x4 //NUM_RECORDS in bytes
+ if (SWIZZLE_EN)
+ s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+
+ read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset) //M0
+ read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //PC
+ read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_exec_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //EXEC
+ read_hwreg_from_mem(s_restore_exec_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_status, s_restore_buf_rsrc0, s_restore_mem_offset) //STATUS
+ read_hwreg_from_mem(s_restore_trapsts, s_restore_buf_rsrc0, s_restore_mem_offset) //TRAPSTS
+ read_hwreg_from_mem(xnack_mask_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //XNACK_MASK_LO
+ read_hwreg_from_mem(xnack_mask_hi, s_restore_buf_rsrc0, s_restore_mem_offset) //XNACK_MASK_HI
+ read_hwreg_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset) //MODE
+ read_hwreg_from_mem(tba_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //TBA_LO
+ read_hwreg_from_mem(tba_hi, s_restore_buf_rsrc0, s_restore_mem_offset) //TBA_HI
+
+ s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS
+
+ s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
+
+ //for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise:
+ if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL))
+ s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8 //pc[31:0]+8 //two back-to-back s_trap are used (first for save and second for restore)
+ s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over
+ end
+ if ((EMU_RUN_HACK) && (EMU_RUN_HACK_RESTORE_NORMAL))
+ s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 4 //pc[31:0]+4 // save is hack through s_trap but restore is normal
+ s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over
+ end
+
+ s_mov_b32 m0, s_restore_m0
+ s_mov_b32 exec_lo, s_restore_exec_lo
+ s_mov_b32 exec_hi, s_restore_exec_hi
+
+ s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK, s_restore_trapsts
+ s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE), s_restore_m0
+ s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK, s_restore_trapsts
+ s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT
+ s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE), s_restore_m0
+ //s_setreg_b32 hwreg(HW_REG_TRAPSTS), s_restore_trapsts //don't overwrite SAVECTX bit as it may be set through external SAVECTX during restore
+ s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode
+ //reuse s_restore_m0 as a temp register
+ s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK
+ s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT
+ s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT
+ s_mov_b32 s_restore_tmp, 0x0 //IB_STS is zero
+ s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
+ s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_FIRST_REPLAY_MASK
+ s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
+ s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT
+ s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
+ s_and_b32 s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK
+ s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT
+ s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp
+
+ s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
+ s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
+ s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu
+
+ s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time
+
+if G8SR_DEBUG_TIMESTAMP
+ s_memrealtime s_g8sr_ts_restore_d
+ s_waitcnt lgkmcnt(0)
+end
+
+// s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution
+ s_rfe_restore_b64 s_restore_pc_lo, s_restore_m0 // s_restore_m0[0] is used to set STATUS.inst_atc
+
+
+/**************************************************************************/
+/* the END */
+/**************************************************************************/
+L_END_PGM:
+ s_endpgm
+
+end
+
+
+/**************************************************************************/
+/* the helper functions */
+/**************************************************************************/
+
+//Only for save hwreg to mem
+function write_hwreg_to_mem(s, s_rsrc, s_mem_offset)
+ s_mov_b32 exec_lo, m0 //assuming exec_lo is not needed anymore from this point on
+ s_mov_b32 m0, s_mem_offset
+ s_buffer_store_dword s, s_rsrc, m0 glc:1
+ s_add_u32 s_mem_offset, s_mem_offset, 4
+ s_mov_b32 m0, exec_lo
+end
+
+
+// HWREG are saved before SGPRs, so all HWREG could be use.
+function write_16sgpr_to_mem(s, s_rsrc, s_mem_offset)
+
+ s_buffer_store_dwordx4 s[0], s_rsrc, 0 glc:1
+ s_buffer_store_dwordx4 s[4], s_rsrc, 16 glc:1
+ s_buffer_store_dwordx4 s[8], s_rsrc, 32 glc:1
+ s_buffer_store_dwordx4 s[12], s_rsrc, 48 glc:1
+ s_add_u32 s_rsrc[0], s_rsrc[0], 4*16
+ s_addc_u32 s_rsrc[1], s_rsrc[1], 0x0 // +scc
+end
+
+
+function read_hwreg_from_mem(s, s_rsrc, s_mem_offset)
+ s_buffer_load_dword s, s_rsrc, s_mem_offset glc:1
+ s_add_u32 s_mem_offset, s_mem_offset, 4
+end
+
+function read_16sgpr_from_mem(s, s_rsrc, s_mem_offset)
+ s_buffer_load_dwordx16 s, s_rsrc, s_mem_offset glc:1
+ s_sub_u32 s_mem_offset, s_mem_offset, 4*16
+end
+
+
+
+function get_lds_size_bytes(s_lds_size_byte)
+ // SQ LDS granularity is 64DW, while PGM_RSRC2.lds_size is in granularity 128DW
+ s_getreg_b32 s_lds_size_byte, hwreg(HW_REG_LDS_ALLOC, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) // lds_size
+ s_lshl_b32 s_lds_size_byte, s_lds_size_byte, 8 //LDS size in dwords = lds_size * 64 *4Bytes // granularity 64DW
+end
+
+function get_vgpr_size_bytes(s_vgpr_size_byte)
+ s_getreg_b32 s_vgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size
+ s_add_u32 s_vgpr_size_byte, s_vgpr_size_byte, 1
+ s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+8) //Number of VGPRs = (vgpr_size + 1) * 4 * 64 * 4 (non-zero value) //FIXME for GFX, zero is possible
+end
+
+function get_sgpr_size_bytes(s_sgpr_size_byte)
+ s_getreg_b32 s_sgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size
+ s_add_u32 s_sgpr_size_byte, s_sgpr_size_byte, 1
+ s_lshl_b32 s_sgpr_size_byte, s_sgpr_size_byte, 6 //Number of SGPRs = (sgpr_size + 1) * 16 *4 (non-zero value)
+end
+
+function get_hwreg_size_bytes
+ return 128 //HWREG size 128 bytes
+end
+
+
+#endif
+
+static const uint32_t cwsr_trap_gfx8_hex[] = {
+ 0xbf820001, 0xbf820123,
+ 0xb8f4f802, 0x89748674,
+ 0xb8f5f803, 0x8675ff75,
+ 0x00000400, 0xbf850011,
+ 0xc00a1e37, 0x00000000,
+ 0xbf8c007f, 0x87777978,
+ 0xbf840002, 0xb974f802,
+ 0xbe801d78, 0xb8f5f803,
+ 0x8675ff75, 0x000001ff,
+ 0xbf850002, 0x80708470,
+ 0x82718071, 0x8671ff71,
+ 0x0000ffff, 0xb974f802,
+ 0xbe801f70, 0xb8f5f803,
+ 0x8675ff75, 0x00000100,
+ 0xbf840006, 0xbefa0080,
+ 0xb97a0203, 0x8671ff71,
+ 0x0000ffff, 0x80f08870,
+ 0x82f18071, 0xbefa0080,
+ 0xb97a0283, 0xbef60068,
+ 0xbef70069, 0xb8fa1c07,
+ 0x8e7a9c7a, 0x87717a71,
+ 0xb8fa03c7, 0x8e7a9b7a,
+ 0x87717a71, 0xb8faf807,
+ 0x867aff7a, 0x00007fff,
+ 0xb97af807, 0xbef2007e,
+ 0xbef3007f, 0xbefe0180,
+ 0xbf900004, 0xbf8e0002,
+ 0xbf88fffe, 0xbef8007e,
+ 0x8679ff7f, 0x0000ffff,
+ 0x8779ff79, 0x00040000,
+ 0xbefa0080, 0xbefb00ff,
+ 0x00807fac, 0x867aff7f,
+ 0x08000000, 0x8f7a837a,
+ 0x877b7a7b, 0x867aff7f,
+ 0x70000000, 0x8f7a817a,
+ 0x877b7a7b, 0xbeef007c,
+ 0xbeee0080, 0xb8ee2a05,
+ 0x806e816e, 0x8e6e8a6e,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x806e7a6e,
+ 0xbefa0084, 0xbefa00ff,
+ 0x01000000, 0xbefe007c,
+ 0xbefc006e, 0xc0611bfc,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc006e, 0xc0611c3c,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc006e, 0xc0611c7c,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc006e, 0xc0611cbc,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc006e, 0xc0611cfc,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc006e, 0xc0611d3c,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0xb8f5f803,
+ 0xbefe007c, 0xbefc006e,
+ 0xc0611d7c, 0x0000007c,
+ 0x806e846e, 0xbefc007e,
+ 0xbefe007c, 0xbefc006e,
+ 0xc0611dbc, 0x0000007c,
+ 0x806e846e, 0xbefc007e,
+ 0xbefe007c, 0xbefc006e,
+ 0xc0611dfc, 0x0000007c,
+ 0x806e846e, 0xbefc007e,
+ 0xb8eff801, 0xbefe007c,
+ 0xbefc006e, 0xc0611bfc,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc006e, 0xc0611b3c,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc006e, 0xc0611b7c,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0x867aff7f,
+ 0x04000000, 0xbef30080,
+ 0x8773737a, 0xb8ee2a05,
+ 0x806e816e, 0x8e6e8a6e,
+ 0xb8f51605, 0x80758175,
+ 0x8e758475, 0x8e7a8275,
+ 0xbefa00ff, 0x01000000,
+ 0xbef60178, 0x80786e78,
+ 0x82798079, 0xbefc0080,
+ 0xbe802b00, 0xbe822b02,
+ 0xbe842b04, 0xbe862b06,
+ 0xbe882b08, 0xbe8a2b0a,
+ 0xbe8c2b0c, 0xbe8e2b0e,
+ 0xc06b003c, 0x00000000,
+ 0xc06b013c, 0x00000010,
+ 0xc06b023c, 0x00000020,
+ 0xc06b033c, 0x00000030,
+ 0x8078c078, 0x82798079,
+ 0x807c907c, 0xbf0a757c,
+ 0xbf85ffeb, 0xbef80176,
+ 0xbeee0080, 0xbefe00c1,
+ 0xbeff00c1, 0xbefa00ff,
+ 0x01000000, 0xe0724000,
+ 0x6e1e0000, 0xe0724100,
+ 0x6e1e0100, 0xe0724200,
+ 0x6e1e0200, 0xe0724300,
+ 0x6e1e0300, 0xbefe00c1,
+ 0xbeff00c1, 0xb8f54306,
+ 0x8675c175, 0xbf84002c,
+ 0xbf8a0000, 0x867aff73,
+ 0x04000000, 0xbf840028,
+ 0x8e758675, 0x8e758275,
+ 0xbefa0075, 0xb8ee2a05,
+ 0x806e816e, 0x8e6e8a6e,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x806e7a6e,
+ 0x806eff6e, 0x00000080,
+ 0xbefa00ff, 0x01000000,
+ 0xbefc0080, 0xd28c0002,
+ 0x000100c1, 0xd28d0003,
+ 0x000204c1, 0xd1060002,
+ 0x00011103, 0x7e0602ff,
+ 0x00000200, 0xbefc00ff,
+ 0x00010000, 0xbe80007b,
+ 0x867bff7b, 0xff7fffff,
+ 0x877bff7b, 0x00058000,
+ 0xd8ec0000, 0x00000002,
+ 0xbf8c007f, 0xe0765000,
+ 0x6e1e0002, 0x32040702,
+ 0xd0c9006a, 0x0000eb02,
+ 0xbf87fff7, 0xbefb0000,
+ 0xbeee00ff, 0x00000400,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xb8f52a05, 0x80758175,
+ 0x8e758275, 0x8e7a8875,
+ 0xbefa00ff, 0x01000000,
+ 0xbefc0084, 0xbf0a757c,
+ 0xbf840015, 0xbf11017c,
+ 0x8075ff75, 0x00001000,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
+ 0xe0724000, 0x6e1e0000,
+ 0xe0724100, 0x6e1e0100,
+ 0xe0724200, 0x6e1e0200,
+ 0xe0724300, 0x6e1e0300,
+ 0x807c847c, 0x806eff6e,
+ 0x00000400, 0xbf0a757c,
+ 0xbf85ffef, 0xbf9c0000,
+ 0xbf8200ca, 0xbef8007e,
+ 0x8679ff7f, 0x0000ffff,
+ 0x8779ff79, 0x00040000,
+ 0xbefa0080, 0xbefb00ff,
+ 0x00807fac, 0x8676ff7f,
+ 0x08000000, 0x8f768376,
+ 0x877b767b, 0x8676ff7f,
+ 0x70000000, 0x8f768176,
+ 0x877b767b, 0x8676ff7f,
+ 0x04000000, 0xbf84001e,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xb8f34306, 0x8673c173,
+ 0xbf840019, 0x8e738673,
+ 0x8e738273, 0xbefa0073,
+ 0xb8f22a05, 0x80728172,
+ 0x8e728a72, 0xb8f61605,
+ 0x80768176, 0x8e768676,
+ 0x80727672, 0x8072ff72,
+ 0x00000080, 0xbefa00ff,
+ 0x01000000, 0xbefc0080,
+ 0xe0510000, 0x721e0000,
+ 0xe0510100, 0x721e0000,
+ 0x807cff7c, 0x00000200,
+ 0x8072ff72, 0x00000200,
+ 0xbf0a737c, 0xbf85fff6,
+ 0xbef20080, 0xbefe00c1,
+ 0xbeff00c1, 0xb8f32a05,
+ 0x80738173, 0x8e738273,
+ 0x8e7a8873, 0xbefa00ff,
+ 0x01000000, 0xbef60072,
+ 0x8072ff72, 0x00000400,
+ 0xbefc0084, 0xbf11087c,
+ 0x8073ff73, 0x00008000,
+ 0xe0524000, 0x721e0000,
+ 0xe0524100, 0x721e0100,
+ 0xe0524200, 0x721e0200,
+ 0xe0524300, 0x721e0300,
+ 0xbf8c0f70, 0x7e000300,
+ 0x7e020301, 0x7e040302,
+ 0x7e060303, 0x807c847c,
+ 0x8072ff72, 0x00000400,
+ 0xbf0a737c, 0xbf85ffee,
+ 0xbf9c0000, 0xe0524000,
+ 0x761e0000, 0xe0524100,
+ 0x761e0100, 0xe0524200,
+ 0x761e0200, 0xe0524300,
+ 0x761e0300, 0xb8f22a05,
+ 0x80728172, 0x8e728a72,
+ 0xb8f61605, 0x80768176,
+ 0x8e768676, 0x80727672,
+ 0x80f2c072, 0xb8f31605,
+ 0x80738173, 0x8e738473,
+ 0x8e7a8273, 0xbefa00ff,
+ 0x01000000, 0xbefc0073,
+ 0xc031003c, 0x00000072,
+ 0x80f2c072, 0xbf8c007f,
+ 0x80fc907c, 0xbe802d00,
+ 0xbe822d02, 0xbe842d04,
+ 0xbe862d06, 0xbe882d08,
+ 0xbe8a2d0a, 0xbe8c2d0c,
+ 0xbe8e2d0e, 0xbf06807c,
+ 0xbf84fff1, 0xb8f22a05,
+ 0x80728172, 0x8e728a72,
+ 0xb8f61605, 0x80768176,
+ 0x8e768676, 0x80727672,
+ 0xbefa0084, 0xbefa00ff,
+ 0x01000000, 0xc0211cfc,
+ 0x00000072, 0x80728472,
+ 0xc0211c3c, 0x00000072,
+ 0x80728472, 0xc0211c7c,
+ 0x00000072, 0x80728472,
+ 0xc0211bbc, 0x00000072,
+ 0x80728472, 0xc0211bfc,
+ 0x00000072, 0x80728472,
+ 0xc0211d3c, 0x00000072,
+ 0x80728472, 0xc0211d7c,
+ 0x00000072, 0x80728472,
+ 0xc0211a3c, 0x00000072,
+ 0x80728472, 0xc0211a7c,
+ 0x00000072, 0x80728472,
+ 0xc0211dfc, 0x00000072,
+ 0x80728472, 0xc0211b3c,
+ 0x00000072, 0x80728472,
+ 0xc0211b7c, 0x00000072,
+ 0x80728472, 0xbf8c007f,
+ 0x8671ff71, 0x0000ffff,
+ 0xbefc0073, 0xbefe006e,
+ 0xbeff006f, 0x867375ff,
+ 0x000003ff, 0xb9734803,
+ 0x867375ff, 0xfffff800,
+ 0x8f738b73, 0xb973a2c3,
+ 0xb977f801, 0x8673ff71,
+ 0xf0000000, 0x8f739c73,
+ 0x8e739073, 0xbef60080,
+ 0x87767376, 0x8673ff71,
+ 0x08000000, 0x8f739b73,
+ 0x8e738f73, 0x87767376,
+ 0x8673ff74, 0x00800000,
+ 0x8f739773, 0xb976f807,
+ 0x86fe7e7e, 0x86ea6a6a,
+ 0xb974f802, 0xbf8a0000,
+ 0x95807370, 0xbf810000,
+};
+
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 505d39156acd..62c3d9cd6ef1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -117,7 +117,7 @@ static int kfd_open(struct inode *inode, struct file *filep)
return -EPERM;
}
- process = kfd_create_process(current);
+ process = kfd_create_process(filep);
if (IS_ERR(process))
return PTR_ERR(process);
@@ -206,6 +206,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
q_properties->ctx_save_restore_area_address =
args->ctx_save_restore_address;
q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
+ q_properties->ctl_stack_size = args->ctl_stack_size;
if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
@@ -431,6 +432,38 @@ out:
return err;
}
+static int kfd_ioctl_set_trap_handler(struct file *filep,
+ struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_set_trap_handler_args *args = data;
+ struct kfd_dev *dev;
+ int err = 0;
+ struct kfd_process_device *pdd;
+
+ dev = kfd_device_by_id(args->gpu_id);
+ if (dev == NULL)
+ return -EINVAL;
+
+ mutex_lock(&p->mutex);
+
+ pdd = kfd_bind_process_to_device(dev, p);
+ if (IS_ERR(pdd)) {
+ err = -ESRCH;
+ goto out;
+ }
+
+ if (dev->dqm->ops.set_trap_handler(dev->dqm,
+ &pdd->qpd,
+ args->tba_addr,
+ args->tma_addr))
+ err = -EINVAL;
+
+out:
+ mutex_unlock(&p->mutex);
+
+ return err;
+}
+
static int kfd_ioctl_dbg_register(struct file *filep,
struct kfd_process *p, void *data)
{
@@ -493,7 +526,7 @@ static int kfd_ioctl_dbg_unregister(struct file *filep,
long status;
dev = kfd_device_by_id(args->gpu_id);
- if (!dev)
+ if (!dev || !dev->dbgmgr)
return -EINVAL;
if (dev->device_info->asic_family == CHIP_CARRIZO) {
@@ -979,7 +1012,10 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
kfd_ioctl_set_scratch_backing_va, 0),
AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG,
- kfd_ioctl_get_tile_config, 0)
+ kfd_ioctl_get_tile_config, 0),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,
+ kfd_ioctl_set_trap_handler, 0),
};
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
@@ -1088,6 +1124,10 @@ static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
KFD_MMAP_EVENTS_MASK) {
vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_EVENTS_MASK;
return kfd_event_mmap(process, vma);
+ } else if ((vma->vm_pgoff & KFD_MMAP_RESERVED_MEM_MASK) ==
+ KFD_MMAP_RESERVED_MEM_MASK) {
+ vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_RESERVED_MEM_MASK;
+ return kfd_reserved_mem_mmap(process, vma);
}
return -EFAULT;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
new file mode 100644
index 000000000000..2bc2816767a7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -0,0 +1,1267 @@
+/*
+ * Copyright 2015-2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/pci.h>
+#include <linux/acpi.h>
+#include <linux/amd-iommu.h>
+#include "kfd_crat.h"
+#include "kfd_priv.h"
+#include "kfd_topology.h"
+
+/* GPU Processor ID base for dGPUs for which VCRAT needs to be created.
+ * GPU processor ID are expressed with Bit[31]=1.
+ * The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs
+ * used in the CRAT.
+ */
+static uint32_t gpu_processor_id_low = 0x80001000;
+
+/* Return the next available gpu_processor_id and increment it for next GPU
+ * @total_cu_count - Total CUs present in the GPU including ones
+ * masked off
+ */
+static inline unsigned int get_and_inc_gpu_processor_id(
+ unsigned int total_cu_count)
+{
+ int current_id = gpu_processor_id_low;
+
+ gpu_processor_id_low += total_cu_count;
+ return current_id;
+}
+
+/* Static table to describe GPU Cache information */
+struct kfd_gpu_cache_info {
+ uint32_t cache_size;
+ uint32_t cache_level;
+ uint32_t flags;
+ /* Indicates how many Compute Units share this cache
+ * Value = 1 indicates the cache is not shared
+ */
+ uint32_t num_cu_shared;
+};
+
+static struct kfd_gpu_cache_info kaveri_cache_info[] = {
+ {
+ /* TCP L1 Cache per CU */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 1,
+
+ },
+ {
+ /* Scalar L1 Instruction Cache (in SQC module) per bank */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* Scalar L1 Data Cache (in SQC module) per bank */
+ .cache_size = 8,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+
+ /* TODO: Add L2 Cache information */
+};
+
+
+static struct kfd_gpu_cache_info carrizo_cache_info[] = {
+ {
+ /* TCP L1 Cache per CU */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 1,
+ },
+ {
+ /* Scalar L1 Instruction Cache (in SQC module) per bank */
+ .cache_size = 8,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 4,
+ },
+ {
+ /* Scalar L1 Data Cache (in SQC module) per bank. */
+ .cache_size = 4,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 4,
+ },
+
+ /* TODO: Add L2 Cache information */
+};
+
+/* NOTE: In future if more information is added to struct kfd_gpu_cache_info
+ * the following ASICs may need a separate table.
+ */
+#define hawaii_cache_info kaveri_cache_info
+#define tonga_cache_info carrizo_cache_info
+#define fiji_cache_info carrizo_cache_info
+#define polaris10_cache_info carrizo_cache_info
+#define polaris11_cache_info carrizo_cache_info
+
+static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
+ struct crat_subtype_computeunit *cu)
+{
+ dev->node_props.cpu_cores_count = cu->num_cpu_cores;
+ dev->node_props.cpu_core_id_base = cu->processor_id_low;
+ if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT)
+ dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
+
+ pr_debug("CU CPU: cores=%d id_base=%d\n", cu->num_cpu_cores,
+ cu->processor_id_low);
+}
+
+static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev,
+ struct crat_subtype_computeunit *cu)
+{
+ dev->node_props.simd_id_base = cu->processor_id_low;
+ dev->node_props.simd_count = cu->num_simd_cores;
+ dev->node_props.lds_size_in_kb = cu->lds_size_in_kb;
+ dev->node_props.max_waves_per_simd = cu->max_waves_simd;
+ dev->node_props.wave_front_size = cu->wave_front_size;
+ dev->node_props.array_count = cu->array_count;
+ dev->node_props.cu_per_simd_array = cu->num_cu_per_array;
+ dev->node_props.simd_per_cu = cu->num_simd_per_cu;
+ dev->node_props.max_slots_scratch_cu = cu->max_slots_scatch_cu;
+ if (cu->hsa_capability & CRAT_CU_FLAGS_HOT_PLUGGABLE)
+ dev->node_props.capability |= HSA_CAP_HOT_PLUGGABLE;
+ pr_debug("CU GPU: id_base=%d\n", cu->processor_id_low);
+}
+
+/* kfd_parse_subtype_cu - parse compute unit subtypes and attach it to correct
+ * topology device present in the device_list
+ */
+static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu,
+ struct list_head *device_list)
+{
+ struct kfd_topology_device *dev;
+
+ pr_debug("Found CU entry in CRAT table with proximity_domain=%d caps=%x\n",
+ cu->proximity_domain, cu->hsa_capability);
+ list_for_each_entry(dev, device_list, list) {
+ if (cu->proximity_domain == dev->proximity_domain) {
+ if (cu->flags & CRAT_CU_FLAGS_CPU_PRESENT)
+ kfd_populated_cu_info_cpu(dev, cu);
+
+ if (cu->flags & CRAT_CU_FLAGS_GPU_PRESENT)
+ kfd_populated_cu_info_gpu(dev, cu);
+ break;
+ }
+ }
+
+ return 0;
+}
+
+/* kfd_parse_subtype_mem - parse memory subtypes and attach it to correct
+ * topology device present in the device_list
+ */
+static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem,
+ struct list_head *device_list)
+{
+ struct kfd_mem_properties *props;
+ struct kfd_topology_device *dev;
+
+ pr_debug("Found memory entry in CRAT table with proximity_domain=%d\n",
+ mem->proximity_domain);
+ list_for_each_entry(dev, device_list, list) {
+ if (mem->proximity_domain == dev->proximity_domain) {
+ props = kfd_alloc_struct(props);
+ if (!props)
+ return -ENOMEM;
+
+ /* We're on GPU node */
+ if (dev->node_props.cpu_cores_count == 0) {
+ /* APU */
+ if (mem->visibility_type == 0)
+ props->heap_type =
+ HSA_MEM_HEAP_TYPE_FB_PRIVATE;
+ /* dGPU */
+ else
+ props->heap_type = mem->visibility_type;
+ } else
+ props->heap_type = HSA_MEM_HEAP_TYPE_SYSTEM;
+
+ if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE)
+ props->flags |= HSA_MEM_FLAGS_HOT_PLUGGABLE;
+ if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE)
+ props->flags |= HSA_MEM_FLAGS_NON_VOLATILE;
+
+ props->size_in_bytes =
+ ((uint64_t)mem->length_high << 32) +
+ mem->length_low;
+ props->width = mem->width;
+
+ dev->node_props.mem_banks_count++;
+ list_add_tail(&props->list, &dev->mem_props);
+
+ break;
+ }
+ }
+
+ return 0;
+}
+
+/* kfd_parse_subtype_cache - parse cache subtypes and attach it to correct
+ * topology device present in the device_list
+ */
+static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache,
+ struct list_head *device_list)
+{
+ struct kfd_cache_properties *props;
+ struct kfd_topology_device *dev;
+ uint32_t id;
+ uint32_t total_num_of_cu;
+
+ id = cache->processor_id_low;
+
+ pr_debug("Found cache entry in CRAT table with processor_id=%d\n", id);
+ list_for_each_entry(dev, device_list, list) {
+ total_num_of_cu = (dev->node_props.array_count *
+ dev->node_props.cu_per_simd_array);
+
+ /* Cache infomration in CRAT doesn't have proximity_domain
+ * information as it is associated with a CPU core or GPU
+ * Compute Unit. So map the cache using CPU core Id or SIMD
+ * (GPU) ID.
+ * TODO: This works because currently we can safely assume that
+ * Compute Units are parsed before caches are parsed. In
+ * future, remove this dependency
+ */
+ if ((id >= dev->node_props.cpu_core_id_base &&
+ id <= dev->node_props.cpu_core_id_base +
+ dev->node_props.cpu_cores_count) ||
+ (id >= dev->node_props.simd_id_base &&
+ id < dev->node_props.simd_id_base +
+ total_num_of_cu)) {
+ props = kfd_alloc_struct(props);
+ if (!props)
+ return -ENOMEM;
+
+ props->processor_id_low = id;
+ props->cache_level = cache->cache_level;
+ props->cache_size = cache->cache_size;
+ props->cacheline_size = cache->cache_line_size;
+ props->cachelines_per_tag = cache->lines_per_tag;
+ props->cache_assoc = cache->associativity;
+ props->cache_latency = cache->cache_latency;
+ memcpy(props->sibling_map, cache->sibling_map,
+ sizeof(props->sibling_map));
+
+ if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE)
+ props->cache_type |= HSA_CACHE_TYPE_DATA;
+ if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE)
+ props->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;
+ if (cache->flags & CRAT_CACHE_FLAGS_CPU_CACHE)
+ props->cache_type |= HSA_CACHE_TYPE_CPU;
+ if (cache->flags & CRAT_CACHE_FLAGS_SIMD_CACHE)
+ props->cache_type |= HSA_CACHE_TYPE_HSACU;
+
+ dev->cache_count++;
+ dev->node_props.caches_count++;
+ list_add_tail(&props->list, &dev->cache_props);
+
+ break;
+ }
+ }
+
+ return 0;
+}
+
+/* kfd_parse_subtype_iolink - parse iolink subtypes and attach it to correct
+ * topology device present in the device_list
+ */
+static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
+ struct list_head *device_list)
+{
+ struct kfd_iolink_properties *props = NULL, *props2;
+ struct kfd_topology_device *dev, *cpu_dev;
+ uint32_t id_from;
+ uint32_t id_to;
+
+ id_from = iolink->proximity_domain_from;
+ id_to = iolink->proximity_domain_to;
+
+ pr_debug("Found IO link entry in CRAT table with id_from=%d\n",
+ id_from);
+ list_for_each_entry(dev, device_list, list) {
+ if (id_from == dev->proximity_domain) {
+ props = kfd_alloc_struct(props);
+ if (!props)
+ return -ENOMEM;
+
+ props->node_from = id_from;
+ props->node_to = id_to;
+ props->ver_maj = iolink->version_major;
+ props->ver_min = iolink->version_minor;
+ props->iolink_type = iolink->io_interface_type;
+
+ if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS)
+ props->weight = 20;
+ else
+ props->weight = node_distance(id_from, id_to);
+
+ props->min_latency = iolink->minimum_latency;
+ props->max_latency = iolink->maximum_latency;
+ props->min_bandwidth = iolink->minimum_bandwidth_mbs;
+ props->max_bandwidth = iolink->maximum_bandwidth_mbs;
+ props->rec_transfer_size =
+ iolink->recommended_transfer_size;
+
+ dev->io_link_count++;
+ dev->node_props.io_links_count++;
+ list_add_tail(&props->list, &dev->io_link_props);
+ break;
+ }
+ }
+
+ /* CPU topology is created before GPUs are detected, so CPU->GPU
+ * links are not built at that time. If a PCIe type is discovered, it
+ * means a GPU is detected and we are adding GPU->CPU to the topology.
+ * At this time, also add the corresponded CPU->GPU link.
+ */
+ if (props && props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) {
+ cpu_dev = kfd_topology_device_by_proximity_domain(id_to);
+ if (!cpu_dev)
+ return -ENODEV;
+ /* same everything but the other direction */
+ props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL);
+ props2->node_from = id_to;
+ props2->node_to = id_from;
+ props2->kobj = NULL;
+ cpu_dev->io_link_count++;
+ cpu_dev->node_props.io_links_count++;
+ list_add_tail(&props2->list, &cpu_dev->io_link_props);
+ }
+
+ return 0;
+}
+
+/* kfd_parse_subtype - parse subtypes and attach it to correct topology device
+ * present in the device_list
+ * @sub_type_hdr - subtype section of crat_image
+ * @device_list - list of topology devices present in this crat_image
+ */
+static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr,
+ struct list_head *device_list)
+{
+ struct crat_subtype_computeunit *cu;
+ struct crat_subtype_memory *mem;
+ struct crat_subtype_cache *cache;
+ struct crat_subtype_iolink *iolink;
+ int ret = 0;
+
+ switch (sub_type_hdr->type) {
+ case CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY:
+ cu = (struct crat_subtype_computeunit *)sub_type_hdr;
+ ret = kfd_parse_subtype_cu(cu, device_list);
+ break;
+ case CRAT_SUBTYPE_MEMORY_AFFINITY:
+ mem = (struct crat_subtype_memory *)sub_type_hdr;
+ ret = kfd_parse_subtype_mem(mem, device_list);
+ break;
+ case CRAT_SUBTYPE_CACHE_AFFINITY:
+ cache = (struct crat_subtype_cache *)sub_type_hdr;
+ ret = kfd_parse_subtype_cache(cache, device_list);
+ break;
+ case CRAT_SUBTYPE_TLB_AFFINITY:
+ /*
+ * For now, nothing to do here
+ */
+ pr_debug("Found TLB entry in CRAT table (not processing)\n");
+ break;
+ case CRAT_SUBTYPE_CCOMPUTE_AFFINITY:
+ /*
+ * For now, nothing to do here
+ */
+ pr_debug("Found CCOMPUTE entry in CRAT table (not processing)\n");
+ break;
+ case CRAT_SUBTYPE_IOLINK_AFFINITY:
+ iolink = (struct crat_subtype_iolink *)sub_type_hdr;
+ ret = kfd_parse_subtype_iolink(iolink, device_list);
+ break;
+ default:
+ pr_warn("Unknown subtype %d in CRAT\n",
+ sub_type_hdr->type);
+ }
+
+ return ret;
+}
+
+/* kfd_parse_crat_table - parse CRAT table. For each node present in CRAT
+ * create a kfd_topology_device and add in to device_list. Also parse
+ * CRAT subtypes and attach it to appropriate kfd_topology_device
+ * @crat_image - input image containing CRAT
+ * @device_list - [OUT] list of kfd_topology_device generated after
+ * parsing crat_image
+ * @proximity_domain - Proximity domain of the first device in the table
+ *
+ * Return - 0 if successful else -ve value
+ */
+int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
+ uint32_t proximity_domain)
+{
+ struct kfd_topology_device *top_dev = NULL;
+ struct crat_subtype_generic *sub_type_hdr;
+ uint16_t node_id;
+ int ret = 0;
+ struct crat_header *crat_table = (struct crat_header *)crat_image;
+ uint16_t num_nodes;
+ uint32_t image_len;
+
+ if (!crat_image)
+ return -EINVAL;
+
+ if (!list_empty(device_list)) {
+ pr_warn("Error device list should be empty\n");
+ return -EINVAL;
+ }
+
+ num_nodes = crat_table->num_domains;
+ image_len = crat_table->length;
+
+ pr_info("Parsing CRAT table with %d nodes\n", num_nodes);
+
+ for (node_id = 0; node_id < num_nodes; node_id++) {
+ top_dev = kfd_create_topology_device(device_list);
+ if (!top_dev)
+ break;
+ top_dev->proximity_domain = proximity_domain++;
+ }
+
+ if (!top_dev) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ memcpy(top_dev->oem_id, crat_table->oem_id, CRAT_OEMID_LENGTH);
+ memcpy(top_dev->oem_table_id, crat_table->oem_table_id,
+ CRAT_OEMTABLEID_LENGTH);
+ top_dev->oem_revision = crat_table->oem_revision;
+
+ sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1);
+ while ((char *)sub_type_hdr + sizeof(struct crat_subtype_generic) <
+ ((char *)crat_image) + image_len) {
+ if (sub_type_hdr->flags & CRAT_SUBTYPE_FLAGS_ENABLED) {
+ ret = kfd_parse_subtype(sub_type_hdr, device_list);
+ if (ret)
+ break;
+ }
+
+ sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
+ sub_type_hdr->length);
+ }
+
+err:
+ if (ret)
+ kfd_release_topology_device_list(device_list);
+
+ return ret;
+}
+
+/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
+static int fill_in_pcache(struct crat_subtype_cache *pcache,
+ struct kfd_gpu_cache_info *pcache_info,
+ struct kfd_cu_info *cu_info,
+ int mem_available,
+ int cu_bitmask,
+ int cache_type, unsigned int cu_processor_id,
+ int cu_block)
+{
+ unsigned int cu_sibling_map_mask;
+ int first_active_cu;
+
+ /* First check if enough memory is available */
+ if (sizeof(struct crat_subtype_cache) > mem_available)
+ return -ENOMEM;
+
+ cu_sibling_map_mask = cu_bitmask;
+ cu_sibling_map_mask >>= cu_block;
+ cu_sibling_map_mask &=
+ ((1 << pcache_info[cache_type].num_cu_shared) - 1);
+ first_active_cu = ffs(cu_sibling_map_mask);
+
+ /* CU could be inactive. In case of shared cache find the first active
+ * CU. and incase of non-shared cache check if the CU is inactive. If
+ * inactive active skip it
+ */
+ if (first_active_cu) {
+ memset(pcache, 0, sizeof(struct crat_subtype_cache));
+ pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY;
+ pcache->length = sizeof(struct crat_subtype_cache);
+ pcache->flags = pcache_info[cache_type].flags;
+ pcache->processor_id_low = cu_processor_id
+ + (first_active_cu - 1);
+ pcache->cache_level = pcache_info[cache_type].cache_level;
+ pcache->cache_size = pcache_info[cache_type].cache_size;
+
+ /* Sibling map is w.r.t processor_id_low, so shift out
+ * inactive CU
+ */
+ cu_sibling_map_mask =
+ cu_sibling_map_mask >> (first_active_cu - 1);
+
+ pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF);
+ pcache->sibling_map[1] =
+ (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
+ pcache->sibling_map[2] =
+ (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
+ pcache->sibling_map[3] =
+ (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
+ return 0;
+ }
+ return 1;
+}
+
+/* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info
+ * tables
+ *
+ * @kdev - [IN] GPU device
+ * @gpu_processor_id - [IN] GPU processor ID to which these caches
+ * associate
+ * @available_size - [IN] Amount of memory available in pcache
+ * @cu_info - [IN] Compute Unit info obtained from KGD
+ * @pcache - [OUT] memory into which cache data is to be filled in.
+ * @size_filled - [OUT] amount of data used up in pcache.
+ * @num_of_entries - [OUT] number of caches added
+ */
+static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
+ int gpu_processor_id,
+ int available_size,
+ struct kfd_cu_info *cu_info,
+ struct crat_subtype_cache *pcache,
+ int *size_filled,
+ int *num_of_entries)
+{
+ struct kfd_gpu_cache_info *pcache_info;
+ int num_of_cache_types = 0;
+ int i, j, k;
+ int ct = 0;
+ int mem_available = available_size;
+ unsigned int cu_processor_id;
+ int ret;
+
+ switch (kdev->device_info->asic_family) {
+ case CHIP_KAVERI:
+ pcache_info = kaveri_cache_info;
+ num_of_cache_types = ARRAY_SIZE(kaveri_cache_info);
+ break;
+ case CHIP_HAWAII:
+ pcache_info = hawaii_cache_info;
+ num_of_cache_types = ARRAY_SIZE(hawaii_cache_info);
+ break;
+ case CHIP_CARRIZO:
+ pcache_info = carrizo_cache_info;
+ num_of_cache_types = ARRAY_SIZE(carrizo_cache_info);
+ break;
+ case CHIP_TONGA:
+ pcache_info = tonga_cache_info;
+ num_of_cache_types = ARRAY_SIZE(tonga_cache_info);
+ break;
+ case CHIP_FIJI:
+ pcache_info = fiji_cache_info;
+ num_of_cache_types = ARRAY_SIZE(fiji_cache_info);
+ break;
+ case CHIP_POLARIS10:
+ pcache_info = polaris10_cache_info;
+ num_of_cache_types = ARRAY_SIZE(polaris10_cache_info);
+ break;
+ case CHIP_POLARIS11:
+ pcache_info = polaris11_cache_info;
+ num_of_cache_types = ARRAY_SIZE(polaris11_cache_info);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ *size_filled = 0;
+ *num_of_entries = 0;
+
+ /* For each type of cache listed in the kfd_gpu_cache_info table,
+ * go through all available Compute Units.
+ * The [i,j,k] loop will
+ * if kfd_gpu_cache_info.num_cu_shared = 1
+ * will parse through all available CU
+ * If (kfd_gpu_cache_info.num_cu_shared != 1)
+ * then it will consider only one CU from
+ * the shared unit
+ */
+
+ for (ct = 0; ct < num_of_cache_types; ct++) {
+ cu_processor_id = gpu_processor_id;
+ for (i = 0; i < cu_info->num_shader_engines; i++) {
+ for (j = 0; j < cu_info->num_shader_arrays_per_engine;
+ j++) {
+ for (k = 0; k < cu_info->num_cu_per_sh;
+ k += pcache_info[ct].num_cu_shared) {
+
+ ret = fill_in_pcache(pcache,
+ pcache_info,
+ cu_info,
+ mem_available,
+ cu_info->cu_bitmap[i][j],
+ ct,
+ cu_processor_id,
+ k);
+
+ if (ret < 0)
+ break;
+
+ if (!ret) {
+ pcache++;
+ (*num_of_entries)++;
+ mem_available -=
+ sizeof(*pcache);
+ (*size_filled) +=
+ sizeof(*pcache);
+ }
+
+ /* Move to next CU block */
+ cu_processor_id +=
+ pcache_info[ct].num_cu_shared;
+ }
+ }
+ }
+ }
+
+ pr_debug("Added [%d] GPU cache entries\n", *num_of_entries);
+
+ return 0;
+}
+
+/*
+ * kfd_create_crat_image_acpi - Allocates memory for CRAT image and
+ * copies CRAT from ACPI (if available).
+ * NOTE: Call kfd_destroy_crat_image to free CRAT image memory
+ *
+ * @crat_image: CRAT read from ACPI. If no CRAT in ACPI then
+ * crat_image will be NULL
+ * @size: [OUT] size of crat_image
+ *
+ * Return 0 if successful else return error code
+ */
+int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
+{
+ struct acpi_table_header *crat_table;
+ acpi_status status;
+ void *pcrat_image;
+
+ if (!crat_image)
+ return -EINVAL;
+
+ *crat_image = NULL;
+
+ /* Fetch the CRAT table from ACPI */
+ status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table);
+ if (status == AE_NOT_FOUND) {
+ pr_warn("CRAT table not found\n");
+ return -ENODATA;
+ } else if (ACPI_FAILURE(status)) {
+ const char *err = acpi_format_exception(status);
+
+ pr_err("CRAT table error: %s\n", err);
+ return -EINVAL;
+ }
+
+ if (ignore_crat) {
+ pr_info("CRAT table disabled by module option\n");
+ return -ENODATA;
+ }
+
+ pcrat_image = kmalloc(crat_table->length, GFP_KERNEL);
+ if (!pcrat_image)
+ return -ENOMEM;
+
+ memcpy(pcrat_image, crat_table, crat_table->length);
+
+ *crat_image = pcrat_image;
+ *size = crat_table->length;
+
+ return 0;
+}
+
+/* Memory required to create Virtual CRAT.
+ * Since there is no easy way to predict the amount of memory required, the
+ * following amount are allocated for CPU and GPU Virtual CRAT. This is
+ * expected to cover all known conditions. But to be safe additional check
+ * is put in the code to ensure we don't overwrite.
+ */
+#define VCRAT_SIZE_FOR_CPU (2 * PAGE_SIZE)
+#define VCRAT_SIZE_FOR_GPU (3 * PAGE_SIZE)
+
+/* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node
+ *
+ * @numa_node_id: CPU NUMA node id
+ * @avail_size: Available size in the memory
+ * @sub_type_hdr: Memory into which compute info will be filled in
+ *
+ * Return 0 if successful else return -ve value
+ */
+static int kfd_fill_cu_for_cpu(int numa_node_id, int *avail_size,
+ int proximity_domain,
+ struct crat_subtype_computeunit *sub_type_hdr)
+{
+ const struct cpumask *cpumask;
+
+ *avail_size -= sizeof(struct crat_subtype_computeunit);
+ if (*avail_size < 0)
+ return -ENOMEM;
+
+ memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit));
+
+ /* Fill in subtype header data */
+ sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY;
+ sub_type_hdr->length = sizeof(struct crat_subtype_computeunit);
+ sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
+
+ cpumask = cpumask_of_node(numa_node_id);
+
+ /* Fill in CU data */
+ sub_type_hdr->flags |= CRAT_CU_FLAGS_CPU_PRESENT;
+ sub_type_hdr->proximity_domain = proximity_domain;
+ sub_type_hdr->processor_id_low = kfd_numa_node_to_apic_id(numa_node_id);
+ if (sub_type_hdr->processor_id_low == -1)
+ return -EINVAL;
+
+ sub_type_hdr->num_cpu_cores = cpumask_weight(cpumask);
+
+ return 0;
+}
+
+/* kfd_fill_mem_info_for_cpu - Fill in Memory info for the given CPU NUMA node
+ *
+ * @numa_node_id: CPU NUMA node id
+ * @avail_size: Available size in the memory
+ * @sub_type_hdr: Memory into which compute info will be filled in
+ *
+ * Return 0 if successful else return -ve value
+ */
+static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size,
+ int proximity_domain,
+ struct crat_subtype_memory *sub_type_hdr)
+{
+ uint64_t mem_in_bytes = 0;
+ pg_data_t *pgdat;
+ int zone_type;
+
+ *avail_size -= sizeof(struct crat_subtype_memory);
+ if (*avail_size < 0)
+ return -ENOMEM;
+
+ memset(sub_type_hdr, 0, sizeof(struct crat_subtype_memory));
+
+ /* Fill in subtype header data */
+ sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY;
+ sub_type_hdr->length = sizeof(struct crat_subtype_memory);
+ sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
+
+ /* Fill in Memory Subunit data */
+
+ /* Unlike si_meminfo, si_meminfo_node is not exported. So
+ * the following lines are duplicated from si_meminfo_node
+ * function
+ */
+ pgdat = NODE_DATA(numa_node_id);
+ for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
+ mem_in_bytes += pgdat->node_zones[zone_type].managed_pages;
+ mem_in_bytes <<= PAGE_SHIFT;
+
+ sub_type_hdr->length_low = lower_32_bits(mem_in_bytes);
+ sub_type_hdr->length_high = upper_32_bits(mem_in_bytes);
+ sub_type_hdr->proximity_domain = proximity_domain;
+
+ return 0;
+}
+
+static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size,
+ uint32_t *num_entries,
+ struct crat_subtype_iolink *sub_type_hdr)
+{
+ int nid;
+ struct cpuinfo_x86 *c = &cpu_data(0);
+ uint8_t link_type;
+
+ if (c->x86_vendor == X86_VENDOR_AMD)
+ link_type = CRAT_IOLINK_TYPE_HYPERTRANSPORT;
+ else
+ link_type = CRAT_IOLINK_TYPE_QPI_1_1;
+
+ *num_entries = 0;
+
+ /* Create IO links from this node to other CPU nodes */
+ for_each_online_node(nid) {
+ if (nid == numa_node_id) /* node itself */
+ continue;
+
+ *avail_size -= sizeof(struct crat_subtype_iolink);
+ if (*avail_size < 0)
+ return -ENOMEM;
+
+ memset(sub_type_hdr, 0, sizeof(struct crat_subtype_iolink));
+
+ /* Fill in subtype header data */
+ sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;
+ sub_type_hdr->length = sizeof(struct crat_subtype_iolink);
+ sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
+
+ /* Fill in IO link data */
+ sub_type_hdr->proximity_domain_from = numa_node_id;
+ sub_type_hdr->proximity_domain_to = nid;
+ sub_type_hdr->io_interface_type = link_type;
+
+ (*num_entries)++;
+ sub_type_hdr++;
+ }
+
+ return 0;
+}
+
+/* kfd_create_vcrat_image_cpu - Create Virtual CRAT for CPU
+ *
+ * @pcrat_image: Fill in VCRAT for CPU
+ * @size: [IN] allocated size of crat_image.
+ * [OUT] actual size of data filled in crat_image
+ */
+static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
+{
+ struct crat_header *crat_table = (struct crat_header *)pcrat_image;
+ struct acpi_table_header *acpi_table;
+ acpi_status status;
+ struct crat_subtype_generic *sub_type_hdr;
+ int avail_size = *size;
+ int numa_node_id;
+ uint32_t entries = 0;
+ int ret = 0;
+
+ if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_CPU)
+ return -EINVAL;
+
+ /* Fill in CRAT Header.
+ * Modify length and total_entries as subunits are added.
+ */
+ avail_size -= sizeof(struct crat_header);
+ if (avail_size < 0)
+ return -ENOMEM;
+
+ memset(crat_table, 0, sizeof(struct crat_header));
+ memcpy(&crat_table->signature, CRAT_SIGNATURE,
+ sizeof(crat_table->signature));
+ crat_table->length = sizeof(struct crat_header);
+
+ status = acpi_get_table("DSDT", 0, &acpi_table);
+ if (status == AE_NOT_FOUND)
+ pr_warn("DSDT table not found for OEM information\n");
+ else {
+ crat_table->oem_revision = acpi_table->revision;
+ memcpy(crat_table->oem_id, acpi_table->oem_id,
+ CRAT_OEMID_LENGTH);
+ memcpy(crat_table->oem_table_id, acpi_table->oem_table_id,
+ CRAT_OEMTABLEID_LENGTH);
+ }
+ crat_table->total_entries = 0;
+ crat_table->num_domains = 0;
+
+ sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1);
+
+ for_each_online_node(numa_node_id) {
+ if (kfd_numa_node_to_apic_id(numa_node_id) == -1)
+ continue;
+
+ /* Fill in Subtype: Compute Unit */
+ ret = kfd_fill_cu_for_cpu(numa_node_id, &avail_size,
+ crat_table->num_domains,
+ (struct crat_subtype_computeunit *)sub_type_hdr);
+ if (ret < 0)
+ return ret;
+ crat_table->length += sub_type_hdr->length;
+ crat_table->total_entries++;
+
+ sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
+ sub_type_hdr->length);
+
+ /* Fill in Subtype: Memory */
+ ret = kfd_fill_mem_info_for_cpu(numa_node_id, &avail_size,
+ crat_table->num_domains,
+ (struct crat_subtype_memory *)sub_type_hdr);
+ if (ret < 0)
+ return ret;
+ crat_table->length += sub_type_hdr->length;
+ crat_table->total_entries++;
+
+ sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
+ sub_type_hdr->length);
+
+ /* Fill in Subtype: IO Link */
+ ret = kfd_fill_iolink_info_for_cpu(numa_node_id, &avail_size,
+ &entries,
+ (struct crat_subtype_iolink *)sub_type_hdr);
+ if (ret < 0)
+ return ret;
+ crat_table->length += (sub_type_hdr->length * entries);
+ crat_table->total_entries += entries;
+
+ sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
+ sub_type_hdr->length * entries);
+
+ crat_table->num_domains++;
+ }
+
+ /* TODO: Add cache Subtype for CPU.
+ * Currently, CPU cache information is available in function
+ * detect_cache_attributes(cpu) defined in the file
+ * ./arch/x86/kernel/cpu/intel_cacheinfo.c. This function is not
+ * exported and to get the same information the code needs to be
+ * duplicated.
+ */
+
+ *size = crat_table->length;
+ pr_info("Virtual CRAT table created for CPU\n");
+
+ return 0;
+}
+
+static int kfd_fill_gpu_memory_affinity(int *avail_size,
+ struct kfd_dev *kdev, uint8_t type, uint64_t size,
+ struct crat_subtype_memory *sub_type_hdr,
+ uint32_t proximity_domain,
+ const struct kfd_local_mem_info *local_mem_info)
+{
+ *avail_size -= sizeof(struct crat_subtype_memory);
+ if (*avail_size < 0)
+ return -ENOMEM;
+
+ memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_memory));
+ sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY;
+ sub_type_hdr->length = sizeof(struct crat_subtype_memory);
+ sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED;
+
+ sub_type_hdr->proximity_domain = proximity_domain;
+
+ pr_debug("Fill gpu memory affinity - type 0x%x size 0x%llx\n",
+ type, size);
+
+ sub_type_hdr->length_low = lower_32_bits(size);
+ sub_type_hdr->length_high = upper_32_bits(size);
+
+ sub_type_hdr->width = local_mem_info->vram_width;
+ sub_type_hdr->visibility_type = type;
+
+ return 0;
+}
+
+/* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
+ * to its NUMA node
+ * @avail_size: Available size in the memory
+ * @kdev - [IN] GPU device
+ * @sub_type_hdr: Memory into which io link info will be filled in
+ * @proximity_domain - proximity domain of the GPU node
+ *
+ * Return 0 if successful else return -ve value
+ */
+static int kfd_fill_gpu_direct_io_link(int *avail_size,
+ struct kfd_dev *kdev,
+ struct crat_subtype_iolink *sub_type_hdr,
+ uint32_t proximity_domain)
+{
+ *avail_size -= sizeof(struct crat_subtype_iolink);
+ if (*avail_size < 0)
+ return -ENOMEM;
+
+ memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink));
+
+ /* Fill in subtype header data */
+ sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;
+ sub_type_hdr->length = sizeof(struct crat_subtype_iolink);
+ sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED;
+
+ /* Fill in IOLINK subtype.
+ * TODO: Fill-in other fields of iolink subtype
+ */
+ sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS;
+ sub_type_hdr->proximity_domain_from = proximity_domain;
+#ifdef CONFIG_NUMA
+ if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
+ sub_type_hdr->proximity_domain_to = 0;
+ else
+ sub_type_hdr->proximity_domain_to = kdev->pdev->dev.numa_node;
+#else
+ sub_type_hdr->proximity_domain_to = 0;
+#endif
+ return 0;
+}
+
+/* kfd_create_vcrat_image_gpu - Create Virtual CRAT for CPU
+ *
+ * @pcrat_image: Fill in VCRAT for GPU
+ * @size: [IN] allocated size of crat_image.
+ * [OUT] actual size of data filled in crat_image
+ */
+static int kfd_create_vcrat_image_gpu(void *pcrat_image,
+ size_t *size, struct kfd_dev *kdev,
+ uint32_t proximity_domain)
+{
+ struct crat_header *crat_table = (struct crat_header *)pcrat_image;
+ struct crat_subtype_generic *sub_type_hdr;
+ struct crat_subtype_computeunit *cu;
+ struct kfd_cu_info cu_info;
+ struct amd_iommu_device_info iommu_info;
+ int avail_size = *size;
+ uint32_t total_num_of_cu;
+ int num_of_cache_entries = 0;
+ int cache_mem_filled = 0;
+ int ret = 0;
+ const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP |
+ AMD_IOMMU_DEVICE_FLAG_PRI_SUP |
+ AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
+ struct kfd_local_mem_info local_mem_info;
+
+ if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU)
+ return -EINVAL;
+
+ /* Fill the CRAT Header.
+ * Modify length and total_entries as subunits are added.
+ */
+ avail_size -= sizeof(struct crat_header);
+ if (avail_size < 0)
+ return -ENOMEM;
+
+ memset(crat_table, 0, sizeof(struct crat_header));
+
+ memcpy(&crat_table->signature, CRAT_SIGNATURE,
+ sizeof(crat_table->signature));
+ /* Change length as we add more subtypes*/
+ crat_table->length = sizeof(struct crat_header);
+ crat_table->num_domains = 1;
+ crat_table->total_entries = 0;
+
+ /* Fill in Subtype: Compute Unit
+ * First fill in the sub type header and then sub type data
+ */
+ avail_size -= sizeof(struct crat_subtype_computeunit);
+ if (avail_size < 0)
+ return -ENOMEM;
+
+ sub_type_hdr = (struct crat_subtype_generic *)(crat_table + 1);
+ memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit));
+
+ sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY;
+ sub_type_hdr->length = sizeof(struct crat_subtype_computeunit);
+ sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
+
+ /* Fill CU subtype data */
+ cu = (struct crat_subtype_computeunit *)sub_type_hdr;
+ cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT;
+ cu->proximity_domain = proximity_domain;
+
+ kdev->kfd2kgd->get_cu_info(kdev->kgd, &cu_info);
+ cu->num_simd_per_cu = cu_info.simd_per_cu;
+ cu->num_simd_cores = cu_info.simd_per_cu * cu_info.cu_active_number;
+ cu->max_waves_simd = cu_info.max_waves_per_simd;
+
+ cu->wave_front_size = cu_info.wave_front_size;
+ cu->array_count = cu_info.num_shader_arrays_per_engine *
+ cu_info.num_shader_engines;
+ total_num_of_cu = (cu->array_count * cu_info.num_cu_per_sh);
+ cu->processor_id_low = get_and_inc_gpu_processor_id(total_num_of_cu);
+ cu->num_cu_per_array = cu_info.num_cu_per_sh;
+ cu->max_slots_scatch_cu = cu_info.max_scratch_slots_per_cu;
+ cu->num_banks = cu_info.num_shader_engines;
+ cu->lds_size_in_kb = cu_info.lds_size;
+
+ cu->hsa_capability = 0;
+
+ /* Check if this node supports IOMMU. During parsing this flag will
+ * translate to HSA_CAP_ATS_PRESENT
+ */
+ iommu_info.flags = 0;
+ if (amd_iommu_device_info(kdev->pdev, &iommu_info) == 0) {
+ if ((iommu_info.flags & required_iommu_flags) ==
+ required_iommu_flags)
+ cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT;
+ }
+
+ crat_table->length += sub_type_hdr->length;
+ crat_table->total_entries++;
+
+ /* Fill in Subtype: Memory. Only on systems with large BAR (no
+ * private FB), report memory as public. On other systems
+ * report the total FB size (public+private) as a single
+ * private heap.
+ */
+ kdev->kfd2kgd->get_local_mem_info(kdev->kgd, &local_mem_info);
+ sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
+ sub_type_hdr->length);
+
+ if (local_mem_info.local_mem_size_private == 0)
+ ret = kfd_fill_gpu_memory_affinity(&avail_size,
+ kdev, HSA_MEM_HEAP_TYPE_FB_PUBLIC,
+ local_mem_info.local_mem_size_public,
+ (struct crat_subtype_memory *)sub_type_hdr,
+ proximity_domain,
+ &local_mem_info);
+ else
+ ret = kfd_fill_gpu_memory_affinity(&avail_size,
+ kdev, HSA_MEM_HEAP_TYPE_FB_PRIVATE,
+ local_mem_info.local_mem_size_public +
+ local_mem_info.local_mem_size_private,
+ (struct crat_subtype_memory *)sub_type_hdr,
+ proximity_domain,
+ &local_mem_info);
+ if (ret < 0)
+ return ret;
+
+ crat_table->length += sizeof(struct crat_subtype_memory);
+ crat_table->total_entries++;
+
+ /* TODO: Fill in cache information. This information is NOT readily
+ * available in KGD
+ */
+ sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
+ sub_type_hdr->length);
+ ret = kfd_fill_gpu_cache_info(kdev, cu->processor_id_low,
+ avail_size,
+ &cu_info,
+ (struct crat_subtype_cache *)sub_type_hdr,
+ &cache_mem_filled,
+ &num_of_cache_entries);
+
+ if (ret < 0)
+ return ret;
+
+ crat_table->length += cache_mem_filled;
+ crat_table->total_entries += num_of_cache_entries;
+ avail_size -= cache_mem_filled;
+
+ /* Fill in Subtype: IO_LINKS
+ * Only direct links are added here which is Link from GPU to
+ * to its NUMA node. Indirect links are added by userspace.
+ */
+ sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
+ cache_mem_filled);
+ ret = kfd_fill_gpu_direct_io_link(&avail_size, kdev,
+ (struct crat_subtype_iolink *)sub_type_hdr, proximity_domain);
+
+ if (ret < 0)
+ return ret;
+
+ crat_table->length += sub_type_hdr->length;
+ crat_table->total_entries++;
+
+ *size = crat_table->length;
+ pr_info("Virtual CRAT table created for GPU\n");
+
+ return ret;
+}
+
+/* kfd_create_crat_image_virtual - Allocates memory for CRAT image and
+ * creates a Virtual CRAT (VCRAT) image
+ *
+ * NOTE: Call kfd_destroy_crat_image to free CRAT image memory
+ *
+ * @crat_image: VCRAT image created because ACPI does not have a
+ * CRAT for this device
+ * @size: [OUT] size of virtual crat_image
+ * @flags: COMPUTE_UNIT_CPU - Create VCRAT for CPU device
+ * COMPUTE_UNIT_GPU - Create VCRAT for GPU
+ * (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU
+ * -- this option is not currently implemented.
+ * The assumption is that all AMD APUs will have CRAT
+ * @kdev: Valid kfd_device required if flags contain COMPUTE_UNIT_GPU
+ *
+ * Return 0 if successful else return -ve value
+ */
+int kfd_create_crat_image_virtual(void **crat_image, size_t *size,
+ int flags, struct kfd_dev *kdev,
+ uint32_t proximity_domain)
+{
+ void *pcrat_image = NULL;
+ int ret = 0;
+
+ if (!crat_image)
+ return -EINVAL;
+
+ *crat_image = NULL;
+
+ /* Allocate one VCRAT_SIZE_FOR_CPU for CPU virtual CRAT image and
+ * VCRAT_SIZE_FOR_GPU for GPU virtual CRAT image. This should cover
+ * all the current conditions. A check is put not to overwrite beyond
+ * allocated size
+ */
+ switch (flags) {
+ case COMPUTE_UNIT_CPU:
+ pcrat_image = kmalloc(VCRAT_SIZE_FOR_CPU, GFP_KERNEL);
+ if (!pcrat_image)
+ return -ENOMEM;
+ *size = VCRAT_SIZE_FOR_CPU;
+ ret = kfd_create_vcrat_image_cpu(pcrat_image, size);
+ break;
+ case COMPUTE_UNIT_GPU:
+ if (!kdev)
+ return -EINVAL;
+ pcrat_image = kmalloc(VCRAT_SIZE_FOR_GPU, GFP_KERNEL);
+ if (!pcrat_image)
+ return -ENOMEM;
+ *size = VCRAT_SIZE_FOR_GPU;
+ ret = kfd_create_vcrat_image_gpu(pcrat_image, size, kdev,
+ proximity_domain);
+ break;
+ case (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU):
+ /* TODO: */
+ ret = -EINVAL;
+ pr_err("VCRAT not implemented for APU\n");
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ if (!ret)
+ *crat_image = pcrat_image;
+ else
+ kfree(pcrat_image);
+
+ return ret;
+}
+
+
+/* kfd_destroy_crat_image
+ *
+ * @crat_image: [IN] - crat_image from kfd_create_crat_image_xxx(..)
+ *
+ */
+void kfd_destroy_crat_image(void *crat_image)
+{
+ kfree(crat_image);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
index a374fa3d3ee6..b5cd182b9edd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
@@ -44,6 +44,10 @@
#define CRAT_OEMID_64BIT_MASK ((1ULL << (CRAT_OEMID_LENGTH * 8)) - 1)
+/* Compute Unit flags */
+#define COMPUTE_UNIT_CPU (1 << 0) /* Create Virtual CRAT for CPU */
+#define COMPUTE_UNIT_GPU (1 << 1) /* Create Virtual CRAT for GPU */
+
struct crat_header {
uint32_t signature;
uint32_t length;
@@ -105,7 +109,7 @@ struct crat_subtype_computeunit {
uint8_t wave_front_size;
uint8_t num_banks;
uint16_t micro_engine_id;
- uint8_t num_arrays;
+ uint8_t array_count;
uint8_t num_cu_per_array;
uint8_t num_simd_per_cu;
uint8_t max_slots_scatch_cu;
@@ -127,13 +131,14 @@ struct crat_subtype_memory {
uint8_t length;
uint16_t reserved;
uint32_t flags;
- uint32_t promixity_domain;
+ uint32_t proximity_domain;
uint32_t base_addr_low;
uint32_t base_addr_high;
uint32_t length_low;
uint32_t length_high;
uint32_t width;
- uint8_t reserved2[CRAT_MEMORY_RESERVED_LENGTH];
+ uint8_t visibility_type; /* for virtual (dGPU) CRAT */
+ uint8_t reserved2[CRAT_MEMORY_RESERVED_LENGTH - 1];
};
/*
@@ -222,9 +227,12 @@ struct crat_subtype_ccompute {
/*
* HSA IO Link Affinity structure and definitions
*/
-#define CRAT_IOLINK_FLAGS_ENABLED 0x00000001
-#define CRAT_IOLINK_FLAGS_COHERENCY 0x00000002
-#define CRAT_IOLINK_FLAGS_RESERVED 0xfffffffc
+#define CRAT_IOLINK_FLAGS_ENABLED (1 << 0)
+#define CRAT_IOLINK_FLAGS_NON_COHERENT (1 << 1)
+#define CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT (1 << 2)
+#define CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT (1 << 3)
+#define CRAT_IOLINK_FLAGS_NO_PEER_TO_PEER_DMA (1 << 4)
+#define CRAT_IOLINK_FLAGS_RESERVED_MASK 0xffffffe0
/*
* IO interface types
@@ -232,10 +240,18 @@ struct crat_subtype_ccompute {
#define CRAT_IOLINK_TYPE_UNDEFINED 0
#define CRAT_IOLINK_TYPE_HYPERTRANSPORT 1
#define CRAT_IOLINK_TYPE_PCIEXPRESS 2
-#define CRAT_IOLINK_TYPE_OTHER 3
+#define CRAT_IOLINK_TYPE_AMBA 3
+#define CRAT_IOLINK_TYPE_MIPI 4
+#define CRAT_IOLINK_TYPE_QPI_1_1 5
+#define CRAT_IOLINK_TYPE_RESERVED1 6
+#define CRAT_IOLINK_TYPE_RESERVED2 7
+#define CRAT_IOLINK_TYPE_RAPID_IO 8
+#define CRAT_IOLINK_TYPE_INFINIBAND 9
+#define CRAT_IOLINK_TYPE_RESERVED3 10
+#define CRAT_IOLINK_TYPE_OTHER 11
#define CRAT_IOLINK_TYPE_MAX 255
-#define CRAT_IOLINK_RESERVED_LENGTH 24
+#define CRAT_IOLINK_RESERVED_LENGTH 24
struct crat_subtype_iolink {
uint8_t type;
@@ -291,4 +307,14 @@ struct cdit_header {
#pragma pack()
+struct kfd_dev;
+
+int kfd_create_crat_image_acpi(void **crat_image, size_t *size);
+void kfd_destroy_crat_image(void *crat_image);
+int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
+ uint32_t proximity_domain);
+int kfd_create_crat_image_virtual(void **crat_image, size_t *size,
+ int flags, struct kfd_dev *kdev,
+ uint32_t proximity_domain);
+
#endif /* KFD_CRAT_H_INCLUDED */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
index c407f6bd9956..afb26f205d29 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
@@ -95,7 +95,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
ib_packet->control = (1 << 23) | (1 << 31) |
- ((size_in_bytes / sizeof(uint32_t)) & 0xfffff);
+ ((size_in_bytes / 4) & 0xfffff);
ib_packet->bitfields5.pasid = pasid;
@@ -126,8 +126,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
rm_packet->header.opcode = IT_RELEASE_MEM;
rm_packet->header.type = PM4_TYPE_3;
- rm_packet->header.count = sizeof(struct pm4__release_mem) /
- sizeof(unsigned int) - 2;
+ rm_packet->header.count = sizeof(struct pm4__release_mem) / 4 - 2;
rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
rm_packet->bitfields2.event_index =
@@ -652,8 +651,7 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
packets_vec[0].header.type = PM4_TYPE_3;
packets_vec[0].bitfields2.reg_offset =
- GRBM_GFX_INDEX / (sizeof(uint32_t)) -
- USERCONFIG_REG_BASE;
+ GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
packets_vec[0].bitfields2.insert_vmid = 0;
packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
@@ -661,8 +659,7 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
packets_vec[1].header.count = 1;
packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
packets_vec[1].header.type = PM4_TYPE_3;
- packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) -
- AMD_CONFIG_REG_BASE;
+ packets_vec[1].bitfields2.reg_offset = SQ_CMD / 4 - AMD_CONFIG_REG_BASE;
packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
packets_vec[1].bitfields2.insert_vmid = 1;
@@ -678,8 +675,7 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
packets_vec[2].bitfields2.reg_offset =
- GRBM_GFX_INDEX / (sizeof(uint32_t)) -
- USERCONFIG_REG_BASE;
+ GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
packets_vec[2].bitfields2.insert_vmid = 0;
packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
new file mode 100644
index 000000000000..4bd6ebfaf425
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2016-2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/debugfs.h>
+#include "kfd_priv.h"
+
+static struct dentry *debugfs_root;
+
+static int kfd_debugfs_open(struct inode *inode, struct file *file)
+{
+ int (*show)(struct seq_file *, void *) = inode->i_private;
+
+ return single_open(file, show, NULL);
+}
+
+static const struct file_operations kfd_debugfs_fops = {
+ .owner = THIS_MODULE,
+ .open = kfd_debugfs_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+void kfd_debugfs_init(void)
+{
+ struct dentry *ent;
+
+ debugfs_root = debugfs_create_dir("kfd", NULL);
+ if (!debugfs_root || debugfs_root == ERR_PTR(-ENODEV)) {
+ pr_warn("Failed to create kfd debugfs dir\n");
+ return;
+ }
+
+ ent = debugfs_create_file("mqds", S_IFREG | 0444, debugfs_root,
+ kfd_debugfs_mqds_by_process,
+ &kfd_debugfs_fops);
+ if (!ent)
+ pr_warn("Failed to create mqds in kfd debugfs\n");
+
+ ent = debugfs_create_file("hqds", S_IFREG | 0444, debugfs_root,
+ kfd_debugfs_hqds_by_device,
+ &kfd_debugfs_fops);
+ if (!ent)
+ pr_warn("Failed to create hqds in kfd debugfs\n");
+
+ ent = debugfs_create_file("rls", S_IFREG | 0444, debugfs_root,
+ kfd_debugfs_rls_by_device,
+ &kfd_debugfs_fops);
+ if (!ent)
+ pr_warn("Failed to create rls in kfd debugfs\n");
+}
+
+void kfd_debugfs_fini(void)
+{
+ debugfs_remove_recursive(debugfs_root);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 621a3b53a038..a8fa33a08de3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -27,6 +27,7 @@
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
#include "kfd_pm4_headers_vi.h"
+#include "cwsr_trap_handler_gfx8.asm"
#define MQD_SIZE_ALIGNED 768
@@ -38,7 +39,8 @@ static const struct kfd_device_info kaveri_device_info = {
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED
+ .mqd_size_aligned = MQD_SIZE_ALIGNED,
+ .supports_cwsr = false,
};
static const struct kfd_device_info carrizo_device_info = {
@@ -49,7 +51,8 @@ static const struct kfd_device_info carrizo_device_info = {
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED
+ .mqd_size_aligned = MQD_SIZE_ALIGNED,
+ .supports_cwsr = true,
};
struct kfd_deviceid {
@@ -212,6 +215,17 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid,
return AMD_IOMMU_INV_PRI_RSP_INVALID;
}
+static void kfd_cwsr_init(struct kfd_dev *kfd)
+{
+ if (cwsr_enable && kfd->device_info->supports_cwsr) {
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
+
+ kfd->cwsr_isa = cwsr_trap_gfx8_hex;
+ kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
+ kfd->cwsr_enabled = true;
+ }
+}
+
bool kgd2kfd_device_init(struct kfd_dev *kfd,
const struct kgd2kfd_shared_resources *gpu_resources)
{
@@ -224,6 +238,17 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd
- kfd->vm_info.first_vmid_kfd + 1;
+ /* Verify module parameters regarding mapped process number*/
+ if ((hws_max_conc_proc < 0)
+ || (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) {
+ dev_err(kfd_device,
+ "hws_max_conc_proc %d must be between 0 and %d, use %d instead\n",
+ hws_max_conc_proc, kfd->vm_info.vmid_num_kfd,
+ kfd->vm_info.vmid_num_kfd);
+ kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd;
+ } else
+ kfd->max_proc_per_quantum = hws_max_conc_proc;
+
/* calculate max size of mqds needed for queues */
size = max_num_of_queues_per_device *
kfd->device_info->mqd_size_aligned;
@@ -286,6 +311,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
goto device_iommu_pasid_error;
}
+ kfd_cwsr_init(kfd);
+
if (kfd_resume(kfd))
goto kfd_resume_error;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index e202921c150e..d0693fd8cbf8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -149,8 +149,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
static int create_queue_nocpsch(struct device_queue_manager *dqm,
struct queue *q,
- struct qcm_process_device *qpd,
- int *allocated_vmid)
+ struct qcm_process_device *qpd)
{
int retval;
@@ -170,9 +169,11 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
if (retval)
goto out_unlock;
}
- *allocated_vmid = qpd->vmid;
q->properties.vmid = qpd->vmid;
+ q->properties.tba_addr = qpd->tba_addr;
+ q->properties.tma_addr = qpd->tma_addr;
+
if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
retval = create_compute_queue_nocpsch(dqm, q, qpd);
else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
@@ -181,10 +182,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
retval = -EINVAL;
if (retval) {
- if (list_empty(&qpd->queues_list)) {
+ if (list_empty(&qpd->queues_list))
deallocate_vmid(dqm, qpd, q);
- *allocated_vmid = 0;
- }
goto out_unlock;
}
@@ -809,16 +808,13 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
}
static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
- struct qcm_process_device *qpd, int *allocate_vmid)
+ struct qcm_process_device *qpd)
{
int retval;
struct mqd_manager *mqd;
retval = 0;
- if (allocate_vmid)
- *allocate_vmid = 0;
-
mutex_lock(&dqm->lock);
if (dqm->total_queue_count >= max_num_of_queues_per_device) {
@@ -846,6 +842,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
}
dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
+
+ q->properties.tba_addr = qpd->tba_addr;
+ q->properties.tma_addr = qpd->tma_addr;
retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
&q->gart_mqd_addr, &q->properties);
if (retval)
@@ -1110,6 +1109,26 @@ out:
return retval;
}
+static int set_trap_handler(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ uint64_t tba_addr,
+ uint64_t tma_addr)
+{
+ uint64_t *tma;
+
+ if (dqm->dev->cwsr_enabled) {
+ /* Jump from CWSR trap handler to user trap */
+ tma = (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET);
+ tma[0] = tba_addr;
+ tma[1] = tma_addr;
+ } else {
+ qpd->tba_addr = tba_addr;
+ qpd->tma_addr = tma_addr;
+ }
+
+ return 0;
+}
+
static int process_termination_nocpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
@@ -1241,6 +1260,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
+ dqm->ops.set_trap_handler = set_trap_handler;
dqm->ops.process_termination = process_termination_cpsch;
break;
case KFD_SCHED_POLICY_NO_HWS:
@@ -1256,6 +1276,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
dqm->ops.initialize = initialize_nocpsch;
dqm->ops.uninitialize = uninitialize;
dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
+ dqm->ops.set_trap_handler = set_trap_handler;
dqm->ops.process_termination = process_termination_nocpsch;
break;
default:
@@ -1290,3 +1311,74 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm)
dqm->ops.uninitialize(dqm);
kfree(dqm);
}
+
+#if defined(CONFIG_DEBUG_FS)
+
+static void seq_reg_dump(struct seq_file *m,
+ uint32_t (*dump)[2], uint32_t n_regs)
+{
+ uint32_t i, count;
+
+ for (i = 0, count = 0; i < n_regs; i++) {
+ if (count == 0 ||
+ dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) {
+ seq_printf(m, "%s %08x: %08x",
+ i ? "\n" : "",
+ dump[i][0], dump[i][1]);
+ count = 7;
+ } else {
+ seq_printf(m, " %08x", dump[i][1]);
+ count--;
+ }
+ }
+
+ seq_puts(m, "\n");
+}
+
+int dqm_debugfs_hqds(struct seq_file *m, void *data)
+{
+ struct device_queue_manager *dqm = data;
+ uint32_t (*dump)[2], n_regs;
+ int pipe, queue;
+ int r = 0;
+
+ for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
+ int pipe_offset = pipe * get_queues_per_pipe(dqm);
+
+ for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) {
+ if (!test_bit(pipe_offset + queue,
+ dqm->dev->shared_resources.queue_bitmap))
+ continue;
+
+ r = dqm->dev->kfd2kgd->hqd_dump(
+ dqm->dev->kgd, pipe, queue, &dump, &n_regs);
+ if (r)
+ break;
+
+ seq_printf(m, " CP Pipe %d, Queue %d\n",
+ pipe, queue);
+ seq_reg_dump(m, dump, n_regs);
+
+ kfree(dump);
+ }
+ }
+
+ for (pipe = 0; pipe < CIK_SDMA_ENGINE_NUM; pipe++) {
+ for (queue = 0; queue < CIK_SDMA_QUEUES_PER_ENGINE; queue++) {
+ r = dqm->dev->kfd2kgd->hqd_sdma_dump(
+ dqm->dev->kgd, pipe, queue, &dump, &n_regs);
+ if (r)
+ break;
+
+ seq_printf(m, " SDMA Engine %d, RLC %d\n",
+ pipe, queue);
+ seq_reg_dump(m, dump, n_regs);
+
+ kfree(dump);
+ }
+ }
+
+ return r;
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 5b77cb69f732..c61b693bfa8c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -84,8 +84,7 @@ struct device_process_node {
struct device_queue_manager_ops {
int (*create_queue)(struct device_queue_manager *dqm,
struct queue *q,
- struct qcm_process_device *qpd,
- int *allocate_vmid);
+ struct qcm_process_device *qpd);
int (*destroy_queue)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
@@ -123,6 +122,11 @@ struct device_queue_manager_ops {
void __user *alternate_aperture_base,
uint64_t alternate_aperture_size);
+ int (*set_trap_handler)(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ uint64_t tba_addr,
+ uint64_t tma_addr);
+
int (*process_termination)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index feb76c235b1a..ebb4da14e3df 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -116,8 +116,7 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
pr_debug("doorbell aperture size == 0x%08lX\n",
kfd->shared_resources.doorbell_aperture_size);
- pr_debug("doorbell kernel address == 0x%08lX\n",
- (uintptr_t)kfd->doorbell_kernel_ptr);
+ pr_debug("doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
return 0;
}
@@ -194,8 +193,8 @@ u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
pr_debug("Get kernel queue doorbell\n"
" doorbell offset == 0x%08X\n"
- " kernel address == 0x%08lX\n",
- *doorbell_off, (uintptr_t)(kfd->doorbell_kernel_ptr + inx));
+ " kernel address == %p\n",
+ *doorbell_off, (kfd->doorbell_kernel_ptr + inx));
return kfd->doorbell_kernel_ptr + inx;
}
@@ -215,7 +214,7 @@ inline void write_kernel_doorbell(u32 __iomem *db, u32 value)
{
if (db) {
writel(value, db);
- pr_debug("Writing %d to doorbell address 0x%p\n", value, db);
+ pr_debug("Writing %d to doorbell address %p\n", value, db);
}
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index cb92d4b72400..93aae5c1e78b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -441,7 +441,7 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
/*
* Because we are called from arbitrary context (workqueue) as opposed
* to process context, kfd_process could attempt to exit while we are
- * running so the lookup function returns a locked process.
+ * running so the lookup function increments the process ref count.
*/
struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
@@ -493,7 +493,7 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
}
mutex_unlock(&p->event_mutex);
- mutex_unlock(&p->mutex);
+ kfd_unref_process(p);
}
static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events)
@@ -847,7 +847,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
/*
* Because we are called from arbitrary context (workqueue) as opposed
* to process context, kfd_process could attempt to exit while we are
- * running so the lookup function returns a locked process.
+ * running so the lookup function increments the process ref count.
*/
struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
struct mm_struct *mm;
@@ -860,7 +860,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
*/
mm = get_task_mm(p->lead_thread);
if (!mm) {
- mutex_unlock(&p->mutex);
+ kfd_unref_process(p);
return; /* Process is exiting */
}
@@ -903,7 +903,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
&memory_exception_data);
mutex_unlock(&p->event_mutex);
- mutex_unlock(&p->mutex);
+ kfd_unref_process(p);
}
void kfd_signal_hw_exception_event(unsigned int pasid)
@@ -911,7 +911,7 @@ void kfd_signal_hw_exception_event(unsigned int pasid)
/*
* Because we are called from arbitrary context (workqueue) as opposed
* to process context, kfd_process could attempt to exit while we are
- * running so the lookup function returns a locked process.
+ * running so the lookup function increments the process ref count.
*/
struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
@@ -924,5 +924,5 @@ void kfd_signal_hw_exception_event(unsigned int pasid)
lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_HW_EXCEPTION, NULL);
mutex_unlock(&p->event_mutex);
- mutex_unlock(&p->mutex);
+ kfd_unref_process(p);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index c59384bbbc5f..7377513050e6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -300,9 +300,14 @@ int kfd_init_apertures(struct kfd_process *process)
struct kfd_process_device *pdd;
/*Iterating over all devices*/
- while ((dev = kfd_topology_enum_kfd_devices(id)) != NULL &&
+ while (kfd_topology_enum_kfd_devices(id, &dev) == 0 &&
id < NUM_OF_SUPPORTED_GPUS) {
+ if (!dev) {
+ id++; /* Skip non GPU devices */
+ continue;
+ }
+
pdd = kfd_create_process_device_data(dev, process);
if (!pdd) {
pr_err("Failed to create process device data\n");
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 8b0c0645d7c0..5dc6567d4a13 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -218,7 +218,7 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
rptr = *kq->rptr_kernel;
wptr = *kq->wptr_kernel;
queue_address = (unsigned int *)kq->pq_kernel_addr;
- queue_size_dwords = kq->queue->properties.queue_size / sizeof(uint32_t);
+ queue_size_dwords = kq->queue->properties.queue_size / 4;
pr_debug("rptr: %d\n", rptr);
pr_debug("wptr: %d\n", wptr);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index f744caeaee04..3ac72bed4f31 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -50,6 +50,15 @@ module_param(sched_policy, int, 0444);
MODULE_PARM_DESC(sched_policy,
"Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)");
+int hws_max_conc_proc = 8;
+module_param(hws_max_conc_proc, int, 0444);
+MODULE_PARM_DESC(hws_max_conc_proc,
+ "Max # processes HWS can execute concurrently when sched_policy=0 (0 = no concurrency, #VMIDs for KFD = Maximum(default))");
+
+int cwsr_enable = 1;
+module_param(cwsr_enable, int, 0444);
+MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = Off, 1 = On (Default))");
+
int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT;
module_param(max_num_of_queues_per_device, int, 0444);
MODULE_PARM_DESC(max_num_of_queues_per_device,
@@ -60,6 +69,11 @@ module_param(send_sigterm, int, 0444);
MODULE_PARM_DESC(send_sigterm,
"Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)");
+int ignore_crat;
+module_param(ignore_crat, int, 0444);
+MODULE_PARM_DESC(ignore_crat,
+ "Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)");
+
static int amdkfd_init_completed;
int kgd2kfd_init(unsigned int interface_version,
@@ -114,6 +128,8 @@ static int __init kfd_module_init(void)
kfd_process_create_wq();
+ kfd_debugfs_init();
+
amdkfd_init_completed = 1;
dev_info(kfd_device, "Initialized module\n");
@@ -130,6 +146,7 @@ static void __exit kfd_module_exit(void)
{
amdkfd_init_completed = 0;
+ kfd_debugfs_fini();
kfd_process_destroy_wq();
kfd_topology_shutdown();
kfd_chardev_exit();
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
index 1f3a6ba7eed2..8972bcfbf701 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
@@ -85,6 +85,10 @@ struct mqd_manager {
uint64_t queue_address, uint32_t pipe_id,
uint32_t queue_id);
+#if defined(CONFIG_DEBUG_FS)
+ int (*debugfs_show_mqd)(struct seq_file *m, void *data);
+#endif
+
struct mutex mqd_mutex;
struct kfd_dev *dev;
};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index 4728fad3fd74..f8ef4a051e08 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -36,6 +36,11 @@ static inline struct cik_mqd *get_mqd(void *mqd)
return (struct cik_mqd *)mqd;
}
+static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
+{
+ return (struct cik_sdma_rlc_registers *)mqd;
+}
+
static int init_mqd(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -149,7 +154,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,
{
/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
- uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1);
+ uint32_t wptr_mask = (uint32_t)((p->queue_size / 4) - 1);
return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
(uint32_t __user *)p->write_ptr,
@@ -160,7 +165,9 @@ static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
struct queue_properties *p, struct mm_struct *mms)
{
- return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd);
+ return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd,
+ (uint32_t __user *)p->write_ptr,
+ mms);
}
static int update_mqd(struct mqd_manager *mm, void *mqd,
@@ -176,8 +183,7 @@ static int update_mqd(struct mqd_manager *mm, void *mqd,
* Calculating queue size which is log base 2 of actual queue size -1
* dwords and another -1 for ffs
*/
- m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int))
- - 1 - 1;
+ m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1;
m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
@@ -202,7 +208,7 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
struct cik_sdma_rlc_registers *m;
m = get_sdma_mqd(mqd);
- m->sdma_rlc_rb_cntl = (ffs(q->queue_size / sizeof(unsigned int)) - 1)
+ m->sdma_rlc_rb_cntl = order_base_2(q->queue_size / 4)
<< SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT |
1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
@@ -343,8 +349,7 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
* Calculating queue size which is log base 2 of actual queue
* size -1 dwords
*/
- m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int))
- - 1 - 1;
+ m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1;
m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
@@ -360,15 +365,25 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
return 0;
}
-struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
-{
- struct cik_sdma_rlc_registers *m;
+#if defined(CONFIG_DEBUG_FS)
- m = (struct cik_sdma_rlc_registers *)mqd;
+static int debugfs_show_mqd(struct seq_file *m, void *data)
+{
+ seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
+ data, sizeof(struct cik_mqd), false);
+ return 0;
+}
- return m;
+static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
+{
+ seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
+ data, sizeof(struct cik_sdma_rlc_registers), false);
+ return 0;
}
+#endif
+
+
struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
struct kfd_dev *dev)
{
@@ -392,6 +407,9 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
mqd->update_mqd = update_mqd;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
break;
case KFD_MQD_TYPE_HIQ:
mqd->init_mqd = init_mqd_hiq;
@@ -400,6 +418,9 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
mqd->update_mqd = update_mqd_hiq;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
break;
case KFD_MQD_TYPE_SDMA:
mqd->init_mqd = init_mqd_sdma;
@@ -408,6 +429,9 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
mqd->update_mqd = update_mqd_sdma;
mqd->destroy_mqd = destroy_mqd_sdma;
mqd->is_occupied = is_occupied_sdma;
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
+#endif
break;
default:
kfree(mqd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index 4ea854f9007b..971aec0637dc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -30,7 +30,7 @@
#include "vi_structs.h"
#include "gca/gfx_8_0_sh_mask.h"
#include "gca/gfx_8_0_enum.h"
-
+#include "oss/oss_3_0_sh_mask.h"
#define CP_MQD_CONTROL__PRIV_STATE__SHIFT 0x8
static inline struct vi_mqd *get_mqd(void *mqd)
@@ -38,6 +38,11 @@ static inline struct vi_mqd *get_mqd(void *mqd)
return (struct vi_mqd *)mqd;
}
+static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+ return (struct vi_sdma_mqd *)mqd;
+}
+
static int init_mqd(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -84,6 +89,28 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
if (q->format == KFD_QUEUE_FORMAT_AQL)
m->cp_hqd_iq_rptr = 1;
+ if (q->tba_addr) {
+ m->compute_tba_lo = lower_32_bits(q->tba_addr >> 8);
+ m->compute_tba_hi = upper_32_bits(q->tba_addr >> 8);
+ m->compute_tma_lo = lower_32_bits(q->tma_addr >> 8);
+ m->compute_tma_hi = upper_32_bits(q->tma_addr >> 8);
+ m->compute_pgm_rsrc2 |=
+ (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT);
+ }
+
+ if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) {
+ m->cp_hqd_persistent_state |=
+ (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
+ m->cp_hqd_ctx_save_base_addr_lo =
+ lower_32_bits(q->ctx_save_restore_area_address);
+ m->cp_hqd_ctx_save_base_addr_hi =
+ upper_32_bits(q->ctx_save_restore_area_address);
+ m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size;
+ m->cp_hqd_cntl_stack_size = q->ctl_stack_size;
+ m->cp_hqd_cntl_stack_offset = q->ctl_stack_size;
+ m->cp_hqd_wg_state_offset = q->ctl_stack_size;
+ }
+
*mqd = m;
if (gart_addr)
*gart_addr = addr;
@@ -98,7 +125,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd,
{
/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
- uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1);
+ uint32_t wptr_mask = (uint32_t)((p->queue_size / 4) - 1);
return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
(uint32_t __user *)p->write_ptr,
@@ -116,8 +143,7 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT |
atc_bit << CP_HQD_PQ_CONTROL__PQ_ATC__SHIFT |
mtype << CP_HQD_PQ_CONTROL__MTYPE__SHIFT;
- m->cp_hqd_pq_control |=
- ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
+ m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1;
pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
@@ -147,7 +173,7 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
* is safe, giving a maximum field value of 0xA.
*/
m->cp_hqd_eop_control |= min(0xA,
- ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1);
+ order_base_2(q->eop_ring_buffer_size / 4) - 1);
m->cp_hqd_eop_base_addr_lo =
lower_32_bits(q->eop_ring_buffer_address >> 8);
m->cp_hqd_eop_base_addr_hi =
@@ -163,6 +189,11 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT;
}
+ if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address)
+ m->cp_hqd_ctx_save_control =
+ atc_bit << CP_HQD_CTX_SAVE_CONTROL__ATC__SHIFT |
+ mtype << CP_HQD_CTX_SAVE_CONTROL__MTYPE__SHIFT;
+
q->is_active = (q->queue_size > 0 &&
q->queue_address != 0 &&
q->queue_percent > 0);
@@ -234,6 +265,117 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
return retval;
}
+static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ int retval;
+ struct vi_sdma_mqd *m;
+
+
+ retval = kfd_gtt_sa_allocate(mm->dev,
+ sizeof(struct vi_sdma_mqd),
+ mqd_mem_obj);
+
+ if (retval != 0)
+ return -ENOMEM;
+
+ m = (struct vi_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr;
+
+ memset(m, 0, sizeof(struct vi_sdma_mqd));
+
+ *mqd = m;
+ if (gart_addr != NULL)
+ *gart_addr = (*mqd_mem_obj)->gpu_addr;
+
+ retval = mm->update_mqd(mm, m, q);
+
+ return retval;
+}
+
+static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ struct kfd_mem_obj *mqd_mem_obj)
+{
+ kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
+}
+
+static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms)
+{
+ return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd,
+ (uint32_t __user *)p->write_ptr,
+ mms);
+}
+
+static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q)
+{
+ struct vi_sdma_mqd *m;
+
+ m = get_sdma_mqd(mqd);
+ m->sdmax_rlcx_rb_cntl = order_base_2(q->queue_size / 4)
+ << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
+ q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT |
+ 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
+ 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT;
+
+ m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8);
+ m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8);
+ m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+ m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+ m->sdmax_rlcx_doorbell =
+ q->doorbell_off << SDMA0_RLC0_DOORBELL__OFFSET__SHIFT;
+
+ m->sdmax_rlcx_virtual_addr = q->sdma_vm_addr;
+
+ m->sdma_engine_id = q->sdma_engine_id;
+ m->sdma_queue_id = q->sdma_queue_id;
+
+ q->is_active = (q->queue_size > 0 &&
+ q->queue_address != 0 &&
+ q->queue_percent > 0);
+
+ return 0;
+}
+
+/*
+ * * preempt type here is ignored because there is only one way
+ * * to preempt sdma queue
+ */
+static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ enum kfd_preempt_type type,
+ unsigned int timeout, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout);
+}
+
+static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd);
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+static int debugfs_show_mqd(struct seq_file *m, void *data)
+{
+ seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
+ data, sizeof(struct vi_mqd), false);
+ return 0;
+}
+
+static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
+{
+ seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
+ data, sizeof(struct vi_sdma_mqd), false);
+ return 0;
+}
+
+#endif
+
struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
struct kfd_dev *dev)
{
@@ -257,6 +399,9 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
mqd->update_mqd = update_mqd;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
break;
case KFD_MQD_TYPE_HIQ:
mqd->init_mqd = init_mqd_hiq;
@@ -265,8 +410,20 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
mqd->update_mqd = update_mqd_hiq;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
break;
case KFD_MQD_TYPE_SDMA:
+ mqd->init_mqd = init_mqd_sdma;
+ mqd->uninit_mqd = uninit_mqd_sdma;
+ mqd->load_mqd = load_mqd_sdma;
+ mqd->update_mqd = update_mqd_sdma;
+ mqd->destroy_mqd = destroy_mqd_sdma;
+ mqd->is_occupied = is_occupied_sdma;
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
+#endif
break;
default:
kfree(mqd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 16da8ad02d8b..0ecbd1f9b606 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -45,7 +45,7 @@ static unsigned int build_pm4_header(unsigned int opcode, size_t packet_size)
header.u32All = 0;
header.opcode = opcode;
- header.count = packet_size/sizeof(uint32_t) - 2;
+ header.count = packet_size / 4 - 2;
header.type = PM4_TYPE_3;
return header.u32All;
@@ -55,15 +55,27 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
unsigned int *rlib_size,
bool *over_subscription)
{
- unsigned int process_count, queue_count;
+ unsigned int process_count, queue_count, compute_queue_count;
unsigned int map_queue_size;
+ unsigned int max_proc_per_quantum = 1;
+ struct kfd_dev *dev = pm->dqm->dev;
process_count = pm->dqm->processes_count;
queue_count = pm->dqm->queue_count;
+ compute_queue_count = queue_count - pm->dqm->sdma_queue_count;
- /* check if there is over subscription*/
+ /* check if there is over subscription
+ * Note: the arbitration between the number of VMIDs and
+ * hws_max_conc_proc has been done in
+ * kgd2kfd_device_init().
+ */
*over_subscription = false;
- if ((process_count > 1) || queue_count > get_queues_num(pm->dqm)) {
+
+ if (dev->max_proc_per_quantum > 1)
+ max_proc_per_quantum = dev->max_proc_per_quantum;
+
+ if ((process_count > max_proc_per_quantum) ||
+ compute_queue_count > get_queues_num(pm->dqm)) {
*over_subscription = true;
pr_debug("Over subscribed runlist\n");
}
@@ -116,10 +128,24 @@ static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer,
uint64_t ib, size_t ib_size_in_dwords, bool chain)
{
struct pm4_mes_runlist *packet;
+ int concurrent_proc_cnt = 0;
+ struct kfd_dev *kfd = pm->dqm->dev;
if (WARN_ON(!ib))
return -EFAULT;
+ /* Determine the number of processes to map together to HW:
+ * it can not exceed the number of VMIDs available to the
+ * scheduler, and it is determined by the smaller of the number
+ * of processes in the runlist and kfd module parameter
+ * hws_max_conc_proc.
+ * Note: the arbitration between the number of VMIDs and
+ * hws_max_conc_proc has been done in
+ * kgd2kfd_device_init().
+ */
+ concurrent_proc_cnt = min(pm->dqm->processes_count,
+ kfd->max_proc_per_quantum);
+
packet = (struct pm4_mes_runlist *)buffer;
memset(buffer, 0, sizeof(struct pm4_mes_runlist));
@@ -130,6 +156,7 @@ static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer,
packet->bitfields4.chain = chain ? 1 : 0;
packet->bitfields4.offload_polling = 0;
packet->bitfields4.valid = 1;
+ packet->bitfields4.process_cnt = concurrent_proc_cnt;
packet->ordinal2 = lower_32_bits(ib);
packet->bitfields3.ib_base_hi = upper_32_bits(ib);
@@ -251,6 +278,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
return retval;
*rl_size_bytes = alloc_size_bytes;
+ pm->ib_size_bytes = alloc_size_bytes;
pr_debug("Building runlist ib process count: %d queues count %d\n",
pm->dqm->processes_count, pm->dqm->queue_count);
@@ -564,3 +592,26 @@ void pm_release_ib(struct packet_manager *pm)
}
mutex_unlock(&pm->lock);
}
+
+#if defined(CONFIG_DEBUG_FS)
+
+int pm_debugfs_runlist(struct seq_file *m, void *data)
+{
+ struct packet_manager *pm = data;
+
+ mutex_lock(&pm->lock);
+
+ if (!pm->allocated) {
+ seq_puts(m, " No active runlist\n");
+ goto out;
+ }
+
+ seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
+ pm->ib_buffer_obj->cpu_ptr, pm->ib_size_bytes, false);
+
+out:
+ mutex_unlock(&pm->lock);
+ return 0;
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
index d6a796144269..15fff4420e53 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
@@ -59,7 +59,7 @@ unsigned int kfd_pasid_alloc(void)
struct kfd_dev *dev = NULL;
unsigned int i = 0;
- while ((dev = kfd_topology_enum_kfd_devices(i)) != NULL) {
+ while ((kfd_topology_enum_kfd_devices(i, &dev)) == 0) {
if (dev && dev->kfd2kgd) {
kfd2kgd = dev->kfd2kgd;
break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 9e4134c5b481..6a48d29ada47 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -33,6 +33,8 @@
#include <linux/kfd_ioctl.h>
#include <linux/idr.h>
#include <linux/kfifo.h>
+#include <linux/seq_file.h>
+#include <linux/kref.h>
#include <kgd_kfd_interface.h>
#include "amd_shared.h"
@@ -41,6 +43,7 @@
#define KFD_MMAP_DOORBELL_MASK 0x8000000000000
#define KFD_MMAP_EVENTS_MASK 0x4000000000000
+#define KFD_MMAP_RESERVED_MEM_MASK 0x2000000000000
/*
* When working with cp scheduler we should assign the HIQ manually or via
@@ -63,6 +66,15 @@
#define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
/*
+ * Size of the per-process TBA+TMA buffer: 2 pages
+ *
+ * The first page is the TBA used for the CWSR ISA code. The second
+ * page is used as TMA for daisy changing a user-mode trap handler.
+ */
+#define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2)
+#define KFD_CWSR_TMA_OFFSET PAGE_SIZE
+
+/*
* Kernel module parameter to specify maximum number of supported queues per
* device
*/
@@ -79,11 +91,25 @@ extern int max_num_of_queues_per_device;
extern int sched_policy;
/*
+ * Kernel module parameter to specify the maximum process
+ * number per HW scheduler
+ */
+extern int hws_max_conc_proc;
+
+extern int cwsr_enable;
+
+/*
* Kernel module parameter to specify whether to send sigterm to HSA process on
* unhandled exception
*/
extern int send_sigterm;
+/*
+ * Ignore CRAT table during KFD initialization, can be used to work around
+ * broken CRAT tables on some AMD systems
+ */
+extern int ignore_crat;
+
/**
* enum kfd_sched_policy
*
@@ -131,6 +157,7 @@ struct kfd_device_info {
size_t ih_ring_entry_size;
uint8_t num_of_watch_points;
uint16_t mqd_size_aligned;
+ bool supports_cwsr;
};
struct kfd_mem_obj {
@@ -200,6 +227,14 @@ struct kfd_dev {
/* Debug manager */
struct kfd_dbgmgr *dbgmgr;
+
+ /* Maximum process number mapped to HW scheduler */
+ unsigned int max_proc_per_quantum;
+
+ /* CWSR */
+ bool cwsr_enabled;
+ const void *cwsr_isa;
+ unsigned int cwsr_isa_size;
};
/* KGD2KFD callbacks */
@@ -332,6 +367,9 @@ struct queue_properties {
uint32_t eop_ring_buffer_size;
uint64_t ctx_save_restore_area_address;
uint32_t ctx_save_restore_area_size;
+ uint32_t ctl_stack_size;
+ uint64_t tba_addr;
+ uint64_t tma_addr;
};
/**
@@ -439,6 +477,11 @@ struct qcm_process_device {
uint32_t num_gws;
uint32_t num_oac;
uint32_t sh_hidden_private_base;
+
+ /* CWSR memory */
+ void *cwsr_kaddr;
+ uint64_t tba_addr;
+ uint64_t tma_addr;
};
@@ -501,6 +544,9 @@ struct kfd_process {
*/
void *mm;
+ struct kref ref;
+ struct work_struct release_work;
+
struct mutex mutex;
/*
@@ -563,9 +609,10 @@ struct amdkfd_ioctl_desc {
void kfd_process_create_wq(void);
void kfd_process_destroy_wq(void);
-struct kfd_process *kfd_create_process(const struct task_struct *);
+struct kfd_process *kfd_create_process(struct file *filep);
struct kfd_process *kfd_get_process(const struct task_struct *);
struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid);
+void kfd_unref_process(struct kfd_process *p);
struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
struct kfd_process *p);
@@ -577,6 +624,9 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
struct kfd_process *p);
+int kfd_reserved_mem_mmap(struct kfd_process *process,
+ struct vm_area_struct *vma);
+
/* Process device data iterator */
struct kfd_process_device *kfd_get_first_process_device_data(
struct kfd_process *p);
@@ -624,9 +674,12 @@ int kfd_topology_init(void);
void kfd_topology_shutdown(void);
int kfd_topology_add_device(struct kfd_dev *gpu);
int kfd_topology_remove_device(struct kfd_dev *gpu);
+struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
+ uint32_t proximity_domain);
struct kfd_dev *kfd_device_by_id(uint32_t gpu_id);
struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev);
-struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx);
+int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev);
+int kfd_numa_node_to_apic_id(int numa_node_id);
/* Interrupts */
int kfd_interrupt_init(struct kfd_dev *dev);
@@ -643,8 +696,6 @@ int kgd2kfd_resume(struct kfd_dev *kfd);
int kfd_init_apertures(struct kfd_process *process);
/* Queue Context Management */
-struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd);
-
int init_queue(struct queue **q, const struct queue_properties *properties);
void uninit_queue(struct queue *q);
void print_queue_properties(struct queue_properties *q);
@@ -699,6 +750,7 @@ struct packet_manager {
struct mutex lock;
bool allocated;
struct kfd_mem_obj *ib_buffer_obj;
+ unsigned int ib_size_bytes;
};
int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
@@ -745,4 +797,23 @@ int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
+/* Debugfs */
+#if defined(CONFIG_DEBUG_FS)
+
+void kfd_debugfs_init(void);
+void kfd_debugfs_fini(void);
+int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data);
+int pqm_debugfs_mqds(struct seq_file *m, void *data);
+int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data);
+int dqm_debugfs_hqds(struct seq_file *m, void *data);
+int kfd_debugfs_rls_by_device(struct seq_file *m, void *data);
+int pm_debugfs_runlist(struct seq_file *m, void *data);
+
+#else
+
+static inline void kfd_debugfs_init(void) {}
+static inline void kfd_debugfs_fini(void) {}
+
+#endif
+
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 1f5ccd28bd41..a22fb0710f15 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -24,10 +24,12 @@
#include <linux/log2.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
+#include <linux/sched/task.h>
#include <linux/slab.h>
#include <linux/amd-iommu.h>
#include <linux/notifier.h>
#include <linux/compat.h>
+#include <linux/mman.h>
struct mm_struct;
@@ -46,13 +48,12 @@ DEFINE_STATIC_SRCU(kfd_processes_srcu);
static struct workqueue_struct *kfd_process_wq;
-struct kfd_process_release_work {
- struct work_struct kfd_work;
- struct kfd_process *p;
-};
-
static struct kfd_process *find_process(const struct task_struct *thread);
-static struct kfd_process *create_process(const struct task_struct *thread);
+static void kfd_process_ref_release(struct kref *ref);
+static struct kfd_process *create_process(const struct task_struct *thread,
+ struct file *filep);
+static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep);
+
void kfd_process_create_wq(void)
{
@@ -68,9 +69,10 @@ void kfd_process_destroy_wq(void)
}
}
-struct kfd_process *kfd_create_process(const struct task_struct *thread)
+struct kfd_process *kfd_create_process(struct file *filep)
{
struct kfd_process *process;
+ struct task_struct *thread = current;
if (!thread->mm)
return ERR_PTR(-EINVAL);
@@ -79,9 +81,6 @@ struct kfd_process *kfd_create_process(const struct task_struct *thread)
if (thread->group_leader->mm != thread->mm)
return ERR_PTR(-EINVAL);
- /* Take mmap_sem because we call __mmu_notifier_register inside */
- down_write(&thread->mm->mmap_sem);
-
/*
* take kfd processes mutex before starting of process creation
* so there won't be a case where two threads of the same process
@@ -93,14 +92,11 @@ struct kfd_process *kfd_create_process(const struct task_struct *thread)
process = find_process(thread);
if (process)
pr_debug("Process already found\n");
-
- if (!process)
- process = create_process(thread);
+ else
+ process = create_process(thread, filep);
mutex_unlock(&kfd_processes_mutex);
- up_write(&thread->mm->mmap_sem);
-
return process;
}
@@ -144,63 +140,75 @@ static struct kfd_process *find_process(const struct task_struct *thread)
return p;
}
-static void kfd_process_wq_release(struct work_struct *work)
+void kfd_unref_process(struct kfd_process *p)
+{
+ kref_put(&p->ref, kfd_process_ref_release);
+}
+
+static void kfd_process_destroy_pdds(struct kfd_process *p)
{
- struct kfd_process_release_work *my_work;
struct kfd_process_device *pdd, *temp;
- struct kfd_process *p;
- my_work = (struct kfd_process_release_work *) work;
+ list_for_each_entry_safe(pdd, temp, &p->per_device_data,
+ per_device_list) {
+ pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n",
+ pdd->dev->id, p->pasid);
- p = my_work->p;
+ list_del(&pdd->per_device_list);
- pr_debug("Releasing process (pasid %d) in workqueue\n",
- p->pasid);
+ if (pdd->qpd.cwsr_kaddr)
+ free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
+ get_order(KFD_CWSR_TBA_TMA_SIZE));
- mutex_lock(&p->mutex);
+ kfree(pdd);
+ }
+}
- list_for_each_entry_safe(pdd, temp, &p->per_device_data,
- per_device_list) {
- pr_debug("Releasing pdd (topology id %d) for process (pasid %d) in workqueue\n",
- pdd->dev->id, p->pasid);
+/* No process locking is needed in this function, because the process
+ * is not findable any more. We must assume that no other thread is
+ * using it any more, otherwise we couldn't safely free the process
+ * structure in the end.
+ */
+static void kfd_process_wq_release(struct work_struct *work)
+{
+ struct kfd_process *p = container_of(work, struct kfd_process,
+ release_work);
+ struct kfd_process_device *pdd;
+
+ pr_debug("Releasing process (pasid %d) in workqueue\n", p->pasid);
+ list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
if (pdd->bound == PDD_BOUND)
amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
-
- list_del(&pdd->per_device_list);
- kfree(pdd);
}
+ kfd_process_destroy_pdds(p);
+
kfd_event_free_process(p);
kfd_pasid_free(p->pasid);
kfd_free_process_doorbells(p);
- mutex_unlock(&p->mutex);
-
mutex_destroy(&p->mutex);
- kfree(p);
+ put_task_struct(p->lead_thread);
- kfree(work);
+ kfree(p);
}
-static void kfd_process_destroy_delayed(struct rcu_head *rcu)
+static void kfd_process_ref_release(struct kref *ref)
{
- struct kfd_process_release_work *work;
- struct kfd_process *p;
+ struct kfd_process *p = container_of(ref, struct kfd_process, ref);
- p = container_of(rcu, struct kfd_process, rcu);
-
- mmdrop(p->mm);
+ INIT_WORK(&p->release_work, kfd_process_wq_release);
+ queue_work(kfd_process_wq, &p->release_work);
+}
- work = kmalloc(sizeof(struct kfd_process_release_work), GFP_ATOMIC);
+static void kfd_process_destroy_delayed(struct rcu_head *rcu)
+{
+ struct kfd_process *p = container_of(rcu, struct kfd_process, rcu);
- if (work) {
- INIT_WORK((struct work_struct *) work, kfd_process_wq_release);
- work->p = p;
- queue_work(kfd_process_wq, (struct work_struct *) work);
- }
+ kfd_unref_process(p);
}
static void kfd_process_notifier_release(struct mmu_notifier *mn,
@@ -244,15 +252,12 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
kfd_process_dequeue_from_all_devices(p);
pqm_uninit(&p->pqm);
+ /* Indicate to other users that MM is no longer valid */
+ p->mm = NULL;
+
mutex_unlock(&p->mutex);
- /*
- * Because we drop mm_count inside kfd_process_destroy_delayed
- * and because the mmu_notifier_unregister function also drop
- * mm_count we need to take an extra count here.
- */
- mmgrab(p->mm);
- mmu_notifier_unregister_no_release(&p->mmu_notifier, p->mm);
+ mmu_notifier_unregister_no_release(&p->mmu_notifier, mm);
mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed);
}
@@ -260,7 +265,44 @@ static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
.release = kfd_process_notifier_release,
};
-static struct kfd_process *create_process(const struct task_struct *thread)
+static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep)
+{
+ unsigned long offset;
+ struct kfd_process_device *pdd = NULL;
+ struct kfd_dev *dev = NULL;
+ struct qcm_process_device *qpd = NULL;
+
+ list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
+ dev = pdd->dev;
+ qpd = &pdd->qpd;
+ if (!dev->cwsr_enabled || qpd->cwsr_kaddr)
+ continue;
+ offset = (dev->id | KFD_MMAP_RESERVED_MEM_MASK) << PAGE_SHIFT;
+ qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
+ KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
+ MAP_SHARED, offset);
+
+ if (IS_ERR_VALUE(qpd->tba_addr)) {
+ int err = qpd->tba_addr;
+
+ pr_err("Failure to set tba address. error %d.\n", err);
+ qpd->tba_addr = 0;
+ qpd->cwsr_kaddr = NULL;
+ return err;
+ }
+
+ memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
+
+ qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
+ pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
+ qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
+ }
+
+ return 0;
+}
+
+static struct kfd_process *create_process(const struct task_struct *thread,
+ struct file *filep)
{
struct kfd_process *process;
int err = -ENOMEM;
@@ -277,13 +319,15 @@ static struct kfd_process *create_process(const struct task_struct *thread)
if (kfd_alloc_process_doorbells(process) < 0)
goto err_alloc_doorbells;
+ kref_init(&process->ref);
+
mutex_init(&process->mutex);
process->mm = thread->mm;
/* register notifier */
process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops;
- err = __mmu_notifier_register(&process->mmu_notifier, process->mm);
+ err = mmu_notifier_register(&process->mmu_notifier, process->mm);
if (err)
goto err_mmu_notifier;
@@ -291,6 +335,7 @@ static struct kfd_process *create_process(const struct task_struct *thread)
(uintptr_t)process->mm);
process->lead_thread = thread->group_leader;
+ get_task_struct(process->lead_thread);
INIT_LIST_HEAD(&process->per_device_data);
@@ -306,8 +351,14 @@ static struct kfd_process *create_process(const struct task_struct *thread)
if (err != 0)
goto err_init_apertures;
+ err = kfd_process_init_cwsr(process, filep);
+ if (err)
+ goto err_init_cwsr;
+
return process;
+err_init_cwsr:
+ kfd_process_destroy_pdds(process);
err_init_apertures:
pqm_uninit(&process->pqm);
err_process_pqm_init:
@@ -343,16 +394,18 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
struct kfd_process_device *pdd = NULL;
pdd = kzalloc(sizeof(*pdd), GFP_KERNEL);
- if (pdd != NULL) {
- pdd->dev = dev;
- INIT_LIST_HEAD(&pdd->qpd.queues_list);
- INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
- pdd->qpd.dqm = dev->dqm;
- pdd->process = p;
- pdd->bound = PDD_UNBOUND;
- pdd->already_dequeued = false;
- list_add(&pdd->per_device_list, &p->per_device_data);
- }
+ if (!pdd)
+ return NULL;
+
+ pdd->dev = dev;
+ INIT_LIST_HEAD(&pdd->qpd.queues_list);
+ INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
+ pdd->qpd.dqm = dev->dqm;
+ pdd->qpd.pqm = &p->pqm;
+ pdd->process = p;
+ pdd->bound = PDD_UNBOUND;
+ pdd->already_dequeued = false;
+ list_add(&pdd->per_device_list, &p->per_device_data);
return pdd;
}
@@ -483,6 +536,8 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid)
mutex_unlock(kfd_get_dbgmgr_mutex());
+ mutex_lock(&p->mutex);
+
pdd = kfd_get_process_device_data(dev, p);
if (pdd)
/* For GPU relying on IOMMU, we need to dequeue here
@@ -491,6 +546,8 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid)
kfd_process_dequeue_from_device(pdd);
mutex_unlock(&p->mutex);
+
+ kfd_unref_process(p);
}
struct kfd_process_device *kfd_get_first_process_device_data(
@@ -515,22 +572,86 @@ bool kfd_has_process_device_data(struct kfd_process *p)
return !(list_empty(&p->per_device_data));
}
-/* This returns with process->mutex locked. */
+/* This increments the process->ref counter. */
struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
{
- struct kfd_process *p;
+ struct kfd_process *p, *ret_p = NULL;
unsigned int temp;
int idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
if (p->pasid == pasid) {
- mutex_lock(&p->mutex);
+ kref_get(&p->ref);
+ ret_p = p;
break;
}
}
srcu_read_unlock(&kfd_processes_srcu, idx);
- return p;
+ return ret_p;
}
+
+int kfd_reserved_mem_mmap(struct kfd_process *process,
+ struct vm_area_struct *vma)
+{
+ struct kfd_dev *dev = kfd_device_by_id(vma->vm_pgoff);
+ struct kfd_process_device *pdd;
+ struct qcm_process_device *qpd;
+
+ if (!dev)
+ return -EINVAL;
+ if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) {
+ pr_err("Incorrect CWSR mapping size.\n");
+ return -EINVAL;
+ }
+
+ pdd = kfd_get_process_device_data(dev, process);
+ if (!pdd)
+ return -EINVAL;
+ qpd = &pdd->qpd;
+
+ qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(KFD_CWSR_TBA_TMA_SIZE));
+ if (!qpd->cwsr_kaddr) {
+ pr_err("Error allocating per process CWSR buffer.\n");
+ return -ENOMEM;
+ }
+
+ vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND
+ | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP;
+ /* Mapping pages to user process */
+ return remap_pfn_range(vma, vma->vm_start,
+ PFN_DOWN(__pa(qpd->cwsr_kaddr)),
+ KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
+{
+ struct kfd_process *p;
+ unsigned int temp;
+ int r = 0;
+
+ int idx = srcu_read_lock(&kfd_processes_srcu);
+
+ hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
+ seq_printf(m, "Process %d PASID %d:\n",
+ p->lead_thread->tgid, p->pasid);
+
+ mutex_lock(&p->mutex);
+ r = pqm_debugfs_mqds(m, &p->pqm);
+ mutex_unlock(&p->mutex);
+
+ if (r)
+ break;
+ }
+
+ srcu_read_unlock(&kfd_processes_srcu, idx);
+
+ return r;
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index a3f1e62c60ba..876380632668 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -178,10 +178,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
return retval;
if (list_empty(&pdd->qpd.queues_list) &&
- list_empty(&pdd->qpd.priv_queue_list)) {
- pdd->qpd.pqm = pqm;
+ list_empty(&pdd->qpd.priv_queue_list))
dev->dqm->ops.register_process(dev->dqm, &pdd->qpd);
- }
pqn = kzalloc(sizeof(*pqn), GFP_KERNEL);
if (!pqn) {
@@ -203,8 +201,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
goto err_create_queue;
pqn->q = q;
pqn->kq = NULL;
- retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd,
- &q->properties.vmid);
+ retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd);
pr_debug("DQM returned %d for create_queue\n", retval);
print_queue(q);
break;
@@ -224,8 +221,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
goto err_create_queue;
pqn->q = q;
pqn->kq = NULL;
- retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd,
- &q->properties.vmid);
+ retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd);
pr_debug("DQM returned %d for create_queue\n", retval);
print_queue(q);
break;
@@ -315,6 +311,10 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
if (pqn->q) {
dqm = pqn->q->device->dqm;
retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
+ if (retval) {
+ pr_debug("Destroy queue failed, returned %d\n", retval);
+ goto err_destroy_queue;
+ }
uninit_queue(pqn->q);
}
@@ -326,6 +326,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
list_empty(&pdd->qpd.priv_queue_list))
dqm->ops.unregister_process(dqm, &pdd->qpd);
+err_destroy_queue:
return retval;
}
@@ -367,4 +368,67 @@ struct kernel_queue *pqm_get_kernel_queue(
return NULL;
}
+#if defined(CONFIG_DEBUG_FS)
+
+int pqm_debugfs_mqds(struct seq_file *m, void *data)
+{
+ struct process_queue_manager *pqm = data;
+ struct process_queue_node *pqn;
+ struct queue *q;
+ enum KFD_MQD_TYPE mqd_type;
+ struct mqd_manager *mqd_manager;
+ int r = 0;
+
+ list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
+ if (pqn->q) {
+ q = pqn->q;
+ switch (q->properties.type) {
+ case KFD_QUEUE_TYPE_SDMA:
+ seq_printf(m, " SDMA queue on device %x\n",
+ q->device->id);
+ mqd_type = KFD_MQD_TYPE_SDMA;
+ break;
+ case KFD_QUEUE_TYPE_COMPUTE:
+ seq_printf(m, " Compute queue on device %x\n",
+ q->device->id);
+ mqd_type = KFD_MQD_TYPE_CP;
+ break;
+ default:
+ seq_printf(m,
+ " Bad user queue type %d on device %x\n",
+ q->properties.type, q->device->id);
+ continue;
+ }
+ mqd_manager = q->device->dqm->ops.get_mqd_manager(
+ q->device->dqm, mqd_type);
+ } else if (pqn->kq) {
+ q = pqn->kq->queue;
+ mqd_manager = pqn->kq->mqd;
+ switch (q->properties.type) {
+ case KFD_QUEUE_TYPE_DIQ:
+ seq_printf(m, " DIQ on device %x\n",
+ pqn->kq->dev->id);
+ mqd_type = KFD_MQD_TYPE_HIQ;
+ break;
+ default:
+ seq_printf(m,
+ " Bad kernel queue type %d on device %x\n",
+ q->properties.type,
+ pqn->kq->dev->id);
+ continue;
+ }
+ } else {
+ seq_printf(m,
+ " Weird: Queue node with neither kernel nor user queue\n");
+ continue;
+ }
+
+ r = mqd_manager->debugfs_show_mqd(m, q->mqd);
+ if (r != 0)
+ break;
+ }
+
+ return r;
+}
+#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 19ce59028d6b..c6a76090a725 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -28,27 +28,32 @@
#include <linux/hash.h>
#include <linux/cpufreq.h>
#include <linux/log2.h>
+#include <linux/dmi.h>
+#include <linux/atomic.h>
#include "kfd_priv.h"
#include "kfd_crat.h"
#include "kfd_topology.h"
+#include "kfd_device_queue_manager.h"
+/* topology_device_list - Master list of all topology devices */
static struct list_head topology_device_list;
-static int topology_crat_parsed;
static struct kfd_system_properties sys_props;
static DECLARE_RWSEM(topology_lock);
+static atomic_t topology_crat_proximity_domain;
-struct kfd_dev *kfd_device_by_id(uint32_t gpu_id)
+struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
+ uint32_t proximity_domain)
{
struct kfd_topology_device *top_dev;
- struct kfd_dev *device = NULL;
+ struct kfd_topology_device *device = NULL;
down_read(&topology_lock);
list_for_each_entry(top_dev, &topology_device_list, list)
- if (top_dev->gpu_id == gpu_id) {
- device = top_dev->gpu;
+ if (top_dev->proximity_domain == proximity_domain) {
+ device = top_dev;
break;
}
@@ -57,7 +62,7 @@ struct kfd_dev *kfd_device_by_id(uint32_t gpu_id)
return device;
}
-struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev)
+struct kfd_dev *kfd_device_by_id(uint32_t gpu_id)
{
struct kfd_topology_device *top_dev;
struct kfd_dev *device = NULL;
@@ -65,7 +70,7 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev)
down_read(&topology_lock);
list_for_each_entry(top_dev, &topology_device_list, list)
- if (top_dev->gpu->pdev == pdev) {
+ if (top_dev->gpu_id == gpu_id) {
device = top_dev->gpu;
break;
}
@@ -75,282 +80,31 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev)
return device;
}
-static int kfd_topology_get_crat_acpi(void *crat_image, size_t *size)
-{
- struct acpi_table_header *crat_table;
- acpi_status status;
-
- if (!size)
- return -EINVAL;
-
- /*
- * Fetch the CRAT table from ACPI
- */
- status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table);
- if (status == AE_NOT_FOUND) {
- pr_warn("CRAT table not found\n");
- return -ENODATA;
- } else if (ACPI_FAILURE(status)) {
- const char *err = acpi_format_exception(status);
-
- pr_err("CRAT table error: %s\n", err);
- return -EINVAL;
- }
-
- if (*size >= crat_table->length && crat_image != NULL)
- memcpy(crat_image, crat_table, crat_table->length);
-
- *size = crat_table->length;
-
- return 0;
-}
-
-static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
- struct crat_subtype_computeunit *cu)
-{
- dev->node_props.cpu_cores_count = cu->num_cpu_cores;
- dev->node_props.cpu_core_id_base = cu->processor_id_low;
- if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT)
- dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
-
- pr_info("CU CPU: cores=%d id_base=%d\n", cu->num_cpu_cores,
- cu->processor_id_low);
-}
-
-static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev,
- struct crat_subtype_computeunit *cu)
-{
- dev->node_props.simd_id_base = cu->processor_id_low;
- dev->node_props.simd_count = cu->num_simd_cores;
- dev->node_props.lds_size_in_kb = cu->lds_size_in_kb;
- dev->node_props.max_waves_per_simd = cu->max_waves_simd;
- dev->node_props.wave_front_size = cu->wave_front_size;
- dev->node_props.mem_banks_count = cu->num_banks;
- dev->node_props.array_count = cu->num_arrays;
- dev->node_props.cu_per_simd_array = cu->num_cu_per_array;
- dev->node_props.simd_per_cu = cu->num_simd_per_cu;
- dev->node_props.max_slots_scratch_cu = cu->max_slots_scatch_cu;
- if (cu->hsa_capability & CRAT_CU_FLAGS_HOT_PLUGGABLE)
- dev->node_props.capability |= HSA_CAP_HOT_PLUGGABLE;
- pr_info("CU GPU: simds=%d id_base=%d\n", cu->num_simd_cores,
- cu->processor_id_low);
-}
-
-/* kfd_parse_subtype_cu is called when the topology mutex is already acquired */
-static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu)
-{
- struct kfd_topology_device *dev;
- int i = 0;
-
- pr_info("Found CU entry in CRAT table with proximity_domain=%d caps=%x\n",
- cu->proximity_domain, cu->hsa_capability);
- list_for_each_entry(dev, &topology_device_list, list) {
- if (cu->proximity_domain == i) {
- if (cu->flags & CRAT_CU_FLAGS_CPU_PRESENT)
- kfd_populated_cu_info_cpu(dev, cu);
-
- if (cu->flags & CRAT_CU_FLAGS_GPU_PRESENT)
- kfd_populated_cu_info_gpu(dev, cu);
- break;
- }
- i++;
- }
-
- return 0;
-}
-
-/*
- * kfd_parse_subtype_mem is called when the topology mutex is
- * already acquired
- */
-static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem)
-{
- struct kfd_mem_properties *props;
- struct kfd_topology_device *dev;
- int i = 0;
-
- pr_info("Found memory entry in CRAT table with proximity_domain=%d\n",
- mem->promixity_domain);
- list_for_each_entry(dev, &topology_device_list, list) {
- if (mem->promixity_domain == i) {
- props = kfd_alloc_struct(props);
- if (props == NULL)
- return -ENOMEM;
-
- if (dev->node_props.cpu_cores_count == 0)
- props->heap_type = HSA_MEM_HEAP_TYPE_FB_PRIVATE;
- else
- props->heap_type = HSA_MEM_HEAP_TYPE_SYSTEM;
-
- if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE)
- props->flags |= HSA_MEM_FLAGS_HOT_PLUGGABLE;
- if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE)
- props->flags |= HSA_MEM_FLAGS_NON_VOLATILE;
-
- props->size_in_bytes =
- ((uint64_t)mem->length_high << 32) +
- mem->length_low;
- props->width = mem->width;
-
- dev->mem_bank_count++;
- list_add_tail(&props->list, &dev->mem_props);
-
- break;
- }
- i++;
- }
-
- return 0;
-}
-
-/*
- * kfd_parse_subtype_cache is called when the topology mutex
- * is already acquired
- */
-static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache)
-{
- struct kfd_cache_properties *props;
- struct kfd_topology_device *dev;
- uint32_t id;
-
- id = cache->processor_id_low;
-
- pr_info("Found cache entry in CRAT table with processor_id=%d\n", id);
- list_for_each_entry(dev, &topology_device_list, list)
- if (id == dev->node_props.cpu_core_id_base ||
- id == dev->node_props.simd_id_base) {
- props = kfd_alloc_struct(props);
- if (props == NULL)
- return -ENOMEM;
-
- props->processor_id_low = id;
- props->cache_level = cache->cache_level;
- props->cache_size = cache->cache_size;
- props->cacheline_size = cache->cache_line_size;
- props->cachelines_per_tag = cache->lines_per_tag;
- props->cache_assoc = cache->associativity;
- props->cache_latency = cache->cache_latency;
-
- if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE)
- props->cache_type |= HSA_CACHE_TYPE_DATA;
- if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE)
- props->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;
- if (cache->flags & CRAT_CACHE_FLAGS_CPU_CACHE)
- props->cache_type |= HSA_CACHE_TYPE_CPU;
- if (cache->flags & CRAT_CACHE_FLAGS_SIMD_CACHE)
- props->cache_type |= HSA_CACHE_TYPE_HSACU;
-
- dev->cache_count++;
- dev->node_props.caches_count++;
- list_add_tail(&props->list, &dev->cache_props);
-
- break;
- }
-
- return 0;
-}
-
-/*
- * kfd_parse_subtype_iolink is called when the topology mutex
- * is already acquired
- */
-static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink)
+struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev)
{
- struct kfd_iolink_properties *props;
- struct kfd_topology_device *dev;
- uint32_t i = 0;
- uint32_t id_from;
- uint32_t id_to;
-
- id_from = iolink->proximity_domain_from;
- id_to = iolink->proximity_domain_to;
+ struct kfd_topology_device *top_dev;
+ struct kfd_dev *device = NULL;
- pr_info("Found IO link entry in CRAT table with id_from=%d\n", id_from);
- list_for_each_entry(dev, &topology_device_list, list) {
- if (id_from == i) {
- props = kfd_alloc_struct(props);
- if (props == NULL)
- return -ENOMEM;
-
- props->node_from = id_from;
- props->node_to = id_to;
- props->ver_maj = iolink->version_major;
- props->ver_min = iolink->version_minor;
-
- /*
- * weight factor (derived from CDIR), currently always 1
- */
- props->weight = 1;
-
- props->min_latency = iolink->minimum_latency;
- props->max_latency = iolink->maximum_latency;
- props->min_bandwidth = iolink->minimum_bandwidth_mbs;
- props->max_bandwidth = iolink->maximum_bandwidth_mbs;
- props->rec_transfer_size =
- iolink->recommended_transfer_size;
-
- dev->io_link_count++;
- dev->node_props.io_links_count++;
- list_add_tail(&props->list, &dev->io_link_props);
+ down_read(&topology_lock);
+ list_for_each_entry(top_dev, &topology_device_list, list)
+ if (top_dev->gpu->pdev == pdev) {
+ device = top_dev->gpu;
break;
}
- i++;
- }
- return 0;
-}
-
-static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr)
-{
- struct crat_subtype_computeunit *cu;
- struct crat_subtype_memory *mem;
- struct crat_subtype_cache *cache;
- struct crat_subtype_iolink *iolink;
- int ret = 0;
-
- switch (sub_type_hdr->type) {
- case CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY:
- cu = (struct crat_subtype_computeunit *)sub_type_hdr;
- ret = kfd_parse_subtype_cu(cu);
- break;
- case CRAT_SUBTYPE_MEMORY_AFFINITY:
- mem = (struct crat_subtype_memory *)sub_type_hdr;
- ret = kfd_parse_subtype_mem(mem);
- break;
- case CRAT_SUBTYPE_CACHE_AFFINITY:
- cache = (struct crat_subtype_cache *)sub_type_hdr;
- ret = kfd_parse_subtype_cache(cache);
- break;
- case CRAT_SUBTYPE_TLB_AFFINITY:
- /*
- * For now, nothing to do here
- */
- pr_info("Found TLB entry in CRAT table (not processing)\n");
- break;
- case CRAT_SUBTYPE_CCOMPUTE_AFFINITY:
- /*
- * For now, nothing to do here
- */
- pr_info("Found CCOMPUTE entry in CRAT table (not processing)\n");
- break;
- case CRAT_SUBTYPE_IOLINK_AFFINITY:
- iolink = (struct crat_subtype_iolink *)sub_type_hdr;
- ret = kfd_parse_subtype_iolink(iolink);
- break;
- default:
- pr_warn("Unknown subtype (%d) in CRAT\n",
- sub_type_hdr->type);
- }
+ up_read(&topology_lock);
- return ret;
+ return device;
}
+/* Called with write topology_lock acquired */
static void kfd_release_topology_device(struct kfd_topology_device *dev)
{
struct kfd_mem_properties *mem;
struct kfd_cache_properties *cache;
struct kfd_iolink_properties *iolink;
+ struct kfd_perf_properties *perf;
list_del(&dev->list);
@@ -375,25 +129,35 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev)
kfree(iolink);
}
- kfree(dev);
+ while (dev->perf_props.next != &dev->perf_props) {
+ perf = container_of(dev->perf_props.next,
+ struct kfd_perf_properties, list);
+ list_del(&perf->list);
+ kfree(perf);
+ }
- sys_props.num_devices--;
+ kfree(dev);
}
-static void kfd_release_live_view(void)
+void kfd_release_topology_device_list(struct list_head *device_list)
{
struct kfd_topology_device *dev;
- while (topology_device_list.next != &topology_device_list) {
- dev = container_of(topology_device_list.next,
- struct kfd_topology_device, list);
+ while (!list_empty(device_list)) {
+ dev = list_first_entry(device_list,
+ struct kfd_topology_device, list);
kfd_release_topology_device(dev);
+ }
}
+static void kfd_release_live_view(void)
+{
+ kfd_release_topology_device_list(&topology_device_list);
memset(&sys_props, 0, sizeof(sys_props));
}
-static struct kfd_topology_device *kfd_create_topology_device(void)
+struct kfd_topology_device *kfd_create_topology_device(
+ struct list_head *device_list)
{
struct kfd_topology_device *dev;
@@ -406,65 +170,13 @@ static struct kfd_topology_device *kfd_create_topology_device(void)
INIT_LIST_HEAD(&dev->mem_props);
INIT_LIST_HEAD(&dev->cache_props);
INIT_LIST_HEAD(&dev->io_link_props);
+ INIT_LIST_HEAD(&dev->perf_props);
- list_add_tail(&dev->list, &topology_device_list);
- sys_props.num_devices++;
+ list_add_tail(&dev->list, device_list);
return dev;
}
-static int kfd_parse_crat_table(void *crat_image)
-{
- struct kfd_topology_device *top_dev;
- struct crat_subtype_generic *sub_type_hdr;
- uint16_t node_id;
- int ret;
- struct crat_header *crat_table = (struct crat_header *)crat_image;
- uint16_t num_nodes;
- uint32_t image_len;
-
- if (!crat_image)
- return -EINVAL;
-
- num_nodes = crat_table->num_domains;
- image_len = crat_table->length;
-
- pr_info("Parsing CRAT table with %d nodes\n", num_nodes);
-
- for (node_id = 0; node_id < num_nodes; node_id++) {
- top_dev = kfd_create_topology_device();
- if (!top_dev) {
- kfd_release_live_view();
- return -ENOMEM;
- }
- }
-
- sys_props.platform_id =
- (*((uint64_t *)crat_table->oem_id)) & CRAT_OEMID_64BIT_MASK;
- sys_props.platform_oem = *((uint64_t *)crat_table->oem_table_id);
- sys_props.platform_rev = crat_table->revision;
-
- sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1);
- while ((char *)sub_type_hdr + sizeof(struct crat_subtype_generic) <
- ((char *)crat_image) + image_len) {
- if (sub_type_hdr->flags & CRAT_SUBTYPE_FLAGS_ENABLED) {
- ret = kfd_parse_subtype(sub_type_hdr);
- if (ret != 0) {
- kfd_release_live_view();
- return ret;
- }
- }
-
- sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
- sub_type_hdr->length);
- }
-
- sys_props.generation_count++;
- topology_crat_parsed = 1;
-
- return 0;
-}
-
#define sysfs_show_gen_prop(buffer, fmt, ...) \
snprintf(buffer, PAGE_SIZE, "%s"fmt, buffer, __VA_ARGS__)
@@ -501,11 +213,17 @@ static ssize_t sysprops_show(struct kobject *kobj, struct attribute *attr,
return ret;
}
+static void kfd_topology_kobj_release(struct kobject *kobj)
+{
+ kfree(kobj);
+}
+
static const struct sysfs_ops sysprops_ops = {
.show = sysprops_show,
};
static struct kobj_type sysprops_type = {
+ .release = kfd_topology_kobj_release,
.sysfs_ops = &sysprops_ops,
};
@@ -541,6 +259,7 @@ static const struct sysfs_ops iolink_ops = {
};
static struct kobj_type iolink_type = {
+ .release = kfd_topology_kobj_release,
.sysfs_ops = &iolink_ops,
};
@@ -568,6 +287,7 @@ static const struct sysfs_ops mem_ops = {
};
static struct kobj_type mem_type = {
+ .release = kfd_topology_kobj_release,
.sysfs_ops = &mem_ops,
};
@@ -575,7 +295,7 @@ static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr,
char *buffer)
{
ssize_t ret;
- uint32_t i;
+ uint32_t i, j;
struct kfd_cache_properties *cache;
/* Making sure that the buffer is an empty string */
@@ -593,12 +313,18 @@ static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr,
sysfs_show_32bit_prop(buffer, "latency", cache->cache_latency);
sysfs_show_32bit_prop(buffer, "type", cache->cache_type);
snprintf(buffer, PAGE_SIZE, "%ssibling_map ", buffer);
- for (i = 0; i < KFD_TOPOLOGY_CPU_SIBLINGS; i++)
- ret = snprintf(buffer, PAGE_SIZE, "%s%d%s",
- buffer, cache->sibling_map[i],
- (i == KFD_TOPOLOGY_CPU_SIBLINGS-1) ?
- "\n" : ",");
-
+ for (i = 0; i < CRAT_SIBLINGMAP_SIZE; i++)
+ for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++) {
+ /* Check each bit */
+ if (cache->sibling_map[i] & (1 << j))
+ ret = snprintf(buffer, PAGE_SIZE,
+ "%s%d%s", buffer, 1, ",");
+ else
+ ret = snprintf(buffer, PAGE_SIZE,
+ "%s%d%s", buffer, 0, ",");
+ }
+ /* Replace the last "," with end of line */
+ *(buffer + strlen(buffer) - 1) = 0xA;
return ret;
}
@@ -607,9 +333,43 @@ static const struct sysfs_ops cache_ops = {
};
static struct kobj_type cache_type = {
+ .release = kfd_topology_kobj_release,
.sysfs_ops = &cache_ops,
};
+/****** Sysfs of Performance Counters ******/
+
+struct kfd_perf_attr {
+ struct kobj_attribute attr;
+ uint32_t data;
+};
+
+static ssize_t perf_show(struct kobject *kobj, struct kobj_attribute *attrs,
+ char *buf)
+{
+ struct kfd_perf_attr *attr;
+
+ buf[0] = 0;
+ attr = container_of(attrs, struct kfd_perf_attr, attr);
+ if (!attr->data) /* invalid data for PMC */
+ return 0;
+ else
+ return sysfs_show_32bit_val(buf, attr->data);
+}
+
+#define KFD_PERF_DESC(_name, _data) \
+{ \
+ .attr = __ATTR(_name, 0444, perf_show, NULL), \
+ .data = _data, \
+}
+
+static struct kfd_perf_attr perf_attr_iommu[] = {
+ KFD_PERF_DESC(max_concurrent, 0),
+ KFD_PERF_DESC(num_counters, 0),
+ KFD_PERF_DESC(counter_ids, 0),
+};
+/****************************************/
+
static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
char *buffer)
{
@@ -646,18 +406,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
dev->node_props.cpu_cores_count);
sysfs_show_32bit_prop(buffer, "simd_count",
dev->node_props.simd_count);
-
- if (dev->mem_bank_count < dev->node_props.mem_banks_count) {
- pr_info_once("mem_banks_count truncated from %d to %d\n",
- dev->node_props.mem_banks_count,
- dev->mem_bank_count);
- sysfs_show_32bit_prop(buffer, "mem_banks_count",
- dev->mem_bank_count);
- } else {
- sysfs_show_32bit_prop(buffer, "mem_banks_count",
- dev->node_props.mem_banks_count);
- }
-
+ sysfs_show_32bit_prop(buffer, "mem_banks_count",
+ dev->node_props.mem_banks_count);
sysfs_show_32bit_prop(buffer, "caches_count",
dev->node_props.caches_count);
sysfs_show_32bit_prop(buffer, "io_links_count",
@@ -705,9 +455,12 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
HSA_CAP_WATCH_POINTS_TOTALBITS_MASK);
}
+ if (dev->gpu->device_info->asic_family == CHIP_TONGA)
+ dev->node_props.capability |=
+ HSA_CAP_AQL_QUEUE_DOUBLE_MAP;
+
sysfs_show_32bit_prop(buffer, "max_engine_clk_fcompute",
- dev->gpu->kfd2kgd->get_max_engine_clock_in_mhz(
- dev->gpu->kgd));
+ dev->node_props.max_engine_clk_fcompute);
sysfs_show_64bit_prop(buffer, "local_mem_size",
(unsigned long long int) 0);
@@ -729,6 +482,7 @@ static const struct sysfs_ops node_ops = {
};
static struct kobj_type node_type = {
+ .release = kfd_topology_kobj_release,
.sysfs_ops = &node_ops,
};
@@ -744,6 +498,7 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
struct kfd_iolink_properties *iolink;
struct kfd_cache_properties *cache;
struct kfd_mem_properties *mem;
+ struct kfd_perf_properties *perf;
if (dev->kobj_iolink) {
list_for_each_entry(iolink, &dev->io_link_props, list)
@@ -780,6 +535,16 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
dev->kobj_mem = NULL;
}
+ if (dev->kobj_perf) {
+ list_for_each_entry(perf, &dev->perf_props, list) {
+ kfree(perf->attr_group);
+ perf->attr_group = NULL;
+ }
+ kobject_del(dev->kobj_perf);
+ kobject_put(dev->kobj_perf);
+ dev->kobj_perf = NULL;
+ }
+
if (dev->kobj_node) {
sysfs_remove_file(dev->kobj_node, &dev->attr_gpuid);
sysfs_remove_file(dev->kobj_node, &dev->attr_name);
@@ -796,8 +561,10 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
struct kfd_iolink_properties *iolink;
struct kfd_cache_properties *cache;
struct kfd_mem_properties *mem;
+ struct kfd_perf_properties *perf;
int ret;
- uint32_t i;
+ uint32_t i, num_attrs;
+ struct attribute **attrs;
if (WARN_ON(dev->kobj_node))
return -EEXIST;
@@ -826,6 +593,10 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
if (!dev->kobj_iolink)
return -ENOMEM;
+ dev->kobj_perf = kobject_create_and_add("perf", dev->kobj_node);
+ if (!dev->kobj_perf)
+ return -ENOMEM;
+
/*
* Creating sysfs files for node properties
*/
@@ -903,11 +674,38 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
if (ret < 0)
return ret;
i++;
-}
+ }
+
+ /* All hardware blocks have the same number of attributes. */
+ num_attrs = sizeof(perf_attr_iommu)/sizeof(struct kfd_perf_attr);
+ list_for_each_entry(perf, &dev->perf_props, list) {
+ perf->attr_group = kzalloc(sizeof(struct kfd_perf_attr)
+ * num_attrs + sizeof(struct attribute_group),
+ GFP_KERNEL);
+ if (!perf->attr_group)
+ return -ENOMEM;
+
+ attrs = (struct attribute **)(perf->attr_group + 1);
+ if (!strcmp(perf->block_name, "iommu")) {
+ /* Information of IOMMU's num_counters and counter_ids is shown
+ * under /sys/bus/event_source/devices/amd_iommu. We don't
+ * duplicate here.
+ */
+ perf_attr_iommu[0].data = perf->max_concurrent;
+ for (i = 0; i < num_attrs; i++)
+ attrs[i] = &perf_attr_iommu[i].attr.attr;
+ }
+ perf->attr_group->name = perf->block_name;
+ perf->attr_group->attrs = attrs;
+ ret = sysfs_create_group(dev->kobj_perf, perf->attr_group);
+ if (ret < 0)
+ return ret;
+ }
return 0;
}
+/* Called with write topology lock acquired */
static int kfd_build_sysfs_node_tree(void)
{
struct kfd_topology_device *dev;
@@ -924,6 +722,7 @@ static int kfd_build_sysfs_node_tree(void)
return 0;
}
+/* Called with write topology lock acquired */
static void kfd_remove_sysfs_node_tree(void)
{
struct kfd_topology_device *dev;
@@ -995,75 +794,246 @@ static void kfd_topology_release_sysfs(void)
}
}
+/* Called with write topology_lock acquired */
+static void kfd_topology_update_device_list(struct list_head *temp_list,
+ struct list_head *master_list)
+{
+ while (!list_empty(temp_list)) {
+ list_move_tail(temp_list->next, master_list);
+ sys_props.num_devices++;
+ }
+}
+
+static void kfd_debug_print_topology(void)
+{
+ struct kfd_topology_device *dev;
+
+ down_read(&topology_lock);
+
+ dev = list_last_entry(&topology_device_list,
+ struct kfd_topology_device, list);
+ if (dev) {
+ if (dev->node_props.cpu_cores_count &&
+ dev->node_props.simd_count) {
+ pr_info("Topology: Add APU node [0x%0x:0x%0x]\n",
+ dev->node_props.device_id,
+ dev->node_props.vendor_id);
+ } else if (dev->node_props.cpu_cores_count)
+ pr_info("Topology: Add CPU node\n");
+ else if (dev->node_props.simd_count)
+ pr_info("Topology: Add dGPU node [0x%0x:0x%0x]\n",
+ dev->node_props.device_id,
+ dev->node_props.vendor_id);
+ }
+ up_read(&topology_lock);
+}
+
+/* Helper function for intializing platform_xx members of
+ * kfd_system_properties. Uses OEM info from the last CPU/APU node.
+ */
+static void kfd_update_system_properties(void)
+{
+ struct kfd_topology_device *dev;
+
+ down_read(&topology_lock);
+ dev = list_last_entry(&topology_device_list,
+ struct kfd_topology_device, list);
+ if (dev) {
+ sys_props.platform_id =
+ (*((uint64_t *)dev->oem_id)) & CRAT_OEMID_64BIT_MASK;
+ sys_props.platform_oem = *((uint64_t *)dev->oem_table_id);
+ sys_props.platform_rev = dev->oem_revision;
+ }
+ up_read(&topology_lock);
+}
+
+static void find_system_memory(const struct dmi_header *dm,
+ void *private)
+{
+ struct kfd_mem_properties *mem;
+ u16 mem_width, mem_clock;
+ struct kfd_topology_device *kdev =
+ (struct kfd_topology_device *)private;
+ const u8 *dmi_data = (const u8 *)(dm + 1);
+
+ if (dm->type == DMI_ENTRY_MEM_DEVICE && dm->length >= 0x15) {
+ mem_width = (u16)(*(const u16 *)(dmi_data + 0x6));
+ mem_clock = (u16)(*(const u16 *)(dmi_data + 0x11));
+ list_for_each_entry(mem, &kdev->mem_props, list) {
+ if (mem_width != 0xFFFF && mem_width != 0)
+ mem->width = mem_width;
+ if (mem_clock != 0)
+ mem->mem_clk_max = mem_clock;
+ }
+ }
+}
+
+/*
+ * Performance counters information is not part of CRAT but we would like to
+ * put them in the sysfs under topology directory for Thunk to get the data.
+ * This function is called before updating the sysfs.
+ */
+static int kfd_add_perf_to_topology(struct kfd_topology_device *kdev)
+{
+ struct kfd_perf_properties *props;
+
+ if (amd_iommu_pc_supported()) {
+ props = kfd_alloc_struct(props);
+ if (!props)
+ return -ENOMEM;
+ strcpy(props->block_name, "iommu");
+ props->max_concurrent = amd_iommu_pc_get_max_banks(0) *
+ amd_iommu_pc_get_max_counters(0); /* assume one iommu */
+ list_add_tail(&props->list, &kdev->perf_props);
+ }
+
+ return 0;
+}
+
+/* kfd_add_non_crat_information - Add information that is not currently
+ * defined in CRAT but is necessary for KFD topology
+ * @dev - topology device to which addition info is added
+ */
+static void kfd_add_non_crat_information(struct kfd_topology_device *kdev)
+{
+ /* Check if CPU only node. */
+ if (!kdev->gpu) {
+ /* Add system memory information */
+ dmi_walk(find_system_memory, kdev);
+ }
+ /* TODO: For GPU node, rearrange code from kfd_topology_add_device */
+}
+
+/* kfd_is_acpi_crat_invalid - CRAT from ACPI is valid only for AMD APU devices.
+ * Ignore CRAT for all other devices. AMD APU is identified if both CPU
+ * and GPU cores are present.
+ * @device_list - topology device list created by parsing ACPI CRAT table.
+ * @return - TRUE if invalid, FALSE is valid.
+ */
+static bool kfd_is_acpi_crat_invalid(struct list_head *device_list)
+{
+ struct kfd_topology_device *dev;
+
+ list_for_each_entry(dev, device_list, list) {
+ if (dev->node_props.cpu_cores_count &&
+ dev->node_props.simd_count)
+ return false;
+ }
+ pr_info("Ignoring ACPI CRAT on non-APU system\n");
+ return true;
+}
+
int kfd_topology_init(void)
{
void *crat_image = NULL;
size_t image_size = 0;
int ret;
-
- /*
- * Initialize the head for the topology device list
+ struct list_head temp_topology_device_list;
+ int cpu_only_node = 0;
+ struct kfd_topology_device *kdev;
+ int proximity_domain;
+
+ /* topology_device_list - Master list of all topology devices
+ * temp_topology_device_list - temporary list created while parsing CRAT
+ * or VCRAT. Once parsing is complete the contents of list is moved to
+ * topology_device_list
*/
+
+ /* Initialize the head for the both the lists */
INIT_LIST_HEAD(&topology_device_list);
+ INIT_LIST_HEAD(&temp_topology_device_list);
init_rwsem(&topology_lock);
- topology_crat_parsed = 0;
memset(&sys_props, 0, sizeof(sys_props));
+ /* Proximity domains in ACPI CRAT tables start counting at
+ * 0. The same should be true for virtual CRAT tables created
+ * at this stage. GPUs added later in kfd_topology_add_device
+ * use a counter.
+ */
+ proximity_domain = 0;
+
/*
- * Get the CRAT image from the ACPI
+ * Get the CRAT image from the ACPI. If ACPI doesn't have one
+ * or if ACPI CRAT is invalid create a virtual CRAT.
+ * NOTE: The current implementation expects all AMD APUs to have
+ * CRAT. If no CRAT is available, it is assumed to be a CPU
*/
- ret = kfd_topology_get_crat_acpi(crat_image, &image_size);
- if (ret == 0 && image_size > 0) {
- pr_info("Found CRAT image with size=%zd\n", image_size);
- crat_image = kmalloc(image_size, GFP_KERNEL);
- if (!crat_image) {
- ret = -ENOMEM;
- pr_err("No memory for allocating CRAT image\n");
- goto err;
+ ret = kfd_create_crat_image_acpi(&crat_image, &image_size);
+ if (!ret) {
+ ret = kfd_parse_crat_table(crat_image,
+ &temp_topology_device_list,
+ proximity_domain);
+ if (ret ||
+ kfd_is_acpi_crat_invalid(&temp_topology_device_list)) {
+ kfd_release_topology_device_list(
+ &temp_topology_device_list);
+ kfd_destroy_crat_image(crat_image);
+ crat_image = NULL;
}
- ret = kfd_topology_get_crat_acpi(crat_image, &image_size);
-
- if (ret == 0) {
- down_write(&topology_lock);
- ret = kfd_parse_crat_table(crat_image);
- if (ret == 0)
- ret = kfd_topology_update_sysfs();
- up_write(&topology_lock);
- } else {
- pr_err("Couldn't get CRAT table size from ACPI\n");
+ }
+
+ if (!crat_image) {
+ ret = kfd_create_crat_image_virtual(&crat_image, &image_size,
+ COMPUTE_UNIT_CPU, NULL,
+ proximity_domain);
+ cpu_only_node = 1;
+ if (ret) {
+ pr_err("Error creating VCRAT table for CPU\n");
+ return ret;
}
- kfree(crat_image);
- } else if (ret == -ENODATA) {
- ret = 0;
- } else {
- pr_err("Couldn't get CRAT table size from ACPI\n");
+
+ ret = kfd_parse_crat_table(crat_image,
+ &temp_topology_device_list,
+ proximity_domain);
+ if (ret) {
+ pr_err("Error parsing VCRAT table for CPU\n");
+ goto err;
+ }
+ }
+
+ kdev = list_first_entry(&temp_topology_device_list,
+ struct kfd_topology_device, list);
+ kfd_add_perf_to_topology(kdev);
+
+ down_write(&topology_lock);
+ kfd_topology_update_device_list(&temp_topology_device_list,
+ &topology_device_list);
+ atomic_set(&topology_crat_proximity_domain, sys_props.num_devices-1);
+ ret = kfd_topology_update_sysfs();
+ up_write(&topology_lock);
+
+ if (!ret) {
+ sys_props.generation_count++;
+ kfd_update_system_properties();
+ kfd_debug_print_topology();
+ pr_info("Finished initializing topology\n");
+ } else
+ pr_err("Failed to update topology in sysfs ret=%d\n", ret);
+
+ /* For nodes with GPU, this information gets added
+ * when GPU is detected (kfd_topology_add_device).
+ */
+ if (cpu_only_node) {
+ /* Add additional information to CPU only node created above */
+ down_write(&topology_lock);
+ kdev = list_first_entry(&topology_device_list,
+ struct kfd_topology_device, list);
+ up_write(&topology_lock);
+ kfd_add_non_crat_information(kdev);
}
err:
- pr_info("Finished initializing topology ret=%d\n", ret);
+ kfd_destroy_crat_image(crat_image);
return ret;
}
void kfd_topology_shutdown(void)
{
+ down_write(&topology_lock);
kfd_topology_release_sysfs();
kfd_release_live_view();
-}
-
-static void kfd_debug_print_topology(void)
-{
- struct kfd_topology_device *dev;
- uint32_t i = 0;
-
- pr_info("DEBUG PRINT OF TOPOLOGY:");
- list_for_each_entry(dev, &topology_device_list, list) {
- pr_info("Node: %d\n", i);
- pr_info("\tGPU assigned: %s\n", (dev->gpu ? "yes" : "no"));
- pr_info("\tCPU count: %d\n", dev->node_props.cpu_cores_count);
- pr_info("\tSIMD count: %d", dev->node_props.simd_count);
- i++;
- }
+ up_write(&topology_lock);
}
static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu)
@@ -1072,11 +1042,15 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu)
uint32_t buf[7];
uint64_t local_mem_size;
int i;
+ struct kfd_local_mem_info local_mem_info;
if (!gpu)
return 0;
- local_mem_size = gpu->kfd2kgd->get_vmem_size(gpu->kgd);
+ gpu->kfd2kgd->get_local_mem_info(gpu->kgd, &local_mem_info);
+
+ local_mem_size = local_mem_info.local_mem_size_private +
+ local_mem_info.local_mem_size_public;
buf[0] = gpu->pdev->devfn;
buf[1] = gpu->pdev->subsystem_vendor;
@@ -1091,19 +1065,26 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu)
return hashout;
}
-
+/* kfd_assign_gpu - Attach @gpu to the correct kfd topology device. If
+ * the GPU device is not already present in the topology device
+ * list then return NULL. This means a new topology device has to
+ * be created for this GPU.
+ * TODO: Rather than assiging @gpu to first topology device withtout
+ * gpu attached, it will better to have more stringent check.
+ */
static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
{
struct kfd_topology_device *dev;
struct kfd_topology_device *out_dev = NULL;
+ down_write(&topology_lock);
list_for_each_entry(dev, &topology_device_list, list)
if (!dev->gpu && (dev->node_props.simd_count > 0)) {
dev->gpu = gpu;
out_dev = dev;
break;
}
-
+ up_write(&topology_lock);
return out_dev;
}
@@ -1115,84 +1096,196 @@ static void kfd_notify_gpu_change(uint32_t gpu_id, int arrival)
*/
}
+/* kfd_fill_mem_clk_max_info - Since CRAT doesn't have memory clock info,
+ * patch this after CRAT parsing.
+ */
+static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev)
+{
+ struct kfd_mem_properties *mem;
+ struct kfd_local_mem_info local_mem_info;
+
+ if (!dev)
+ return;
+
+ /* Currently, amdgpu driver (amdgpu_mc) deals only with GPUs with
+ * single bank of VRAM local memory.
+ * for dGPUs - VCRAT reports only one bank of Local Memory
+ * for APUs - If CRAT from ACPI reports more than one bank, then
+ * all the banks will report the same mem_clk_max information
+ */
+ dev->gpu->kfd2kgd->get_local_mem_info(dev->gpu->kgd,
+ &local_mem_info);
+
+ list_for_each_entry(mem, &dev->mem_props, list)
+ mem->mem_clk_max = local_mem_info.mem_clk_max;
+}
+
+static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
+{
+ struct kfd_iolink_properties *link;
+
+ if (!dev || !dev->gpu)
+ return;
+
+ /* GPU only creates direck links so apply flags setting to all */
+ if (dev->gpu->device_info->asic_family == CHIP_HAWAII)
+ list_for_each_entry(link, &dev->io_link_props, list)
+ link->flags = CRAT_IOLINK_FLAGS_ENABLED |
+ CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
+ CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
+}
+
int kfd_topology_add_device(struct kfd_dev *gpu)
{
uint32_t gpu_id;
struct kfd_topology_device *dev;
- int res;
+ struct kfd_cu_info cu_info;
+ int res = 0;
+ struct list_head temp_topology_device_list;
+ void *crat_image = NULL;
+ size_t image_size = 0;
+ int proximity_domain;
+
+ INIT_LIST_HEAD(&temp_topology_device_list);
gpu_id = kfd_generate_gpu_id(gpu);
pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
- down_write(&topology_lock);
- /*
- * Try to assign the GPU to existing topology device (generated from
- * CRAT table
+ proximity_domain = atomic_inc_return(&topology_crat_proximity_domain);
+
+ /* Check to see if this gpu device exists in the topology_device_list.
+ * If so, assign the gpu to that device,
+ * else create a Virtual CRAT for this gpu device and then parse that
+ * CRAT to create a new topology device. Once created assign the gpu to
+ * that topology device
*/
dev = kfd_assign_gpu(gpu);
if (!dev) {
- pr_info("GPU was not found in the current topology. Extending.\n");
- kfd_debug_print_topology();
- dev = kfd_create_topology_device();
- if (!dev) {
- res = -ENOMEM;
+ res = kfd_create_crat_image_virtual(&crat_image, &image_size,
+ COMPUTE_UNIT_GPU, gpu,
+ proximity_domain);
+ if (res) {
+ pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n",
+ gpu_id);
+ return res;
+ }
+ res = kfd_parse_crat_table(crat_image,
+ &temp_topology_device_list,
+ proximity_domain);
+ if (res) {
+ pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n",
+ gpu_id);
goto err;
}
- dev->gpu = gpu;
- /*
- * TODO: Make a call to retrieve topology information from the
- * GPU vBIOS
- */
+ down_write(&topology_lock);
+ kfd_topology_update_device_list(&temp_topology_device_list,
+ &topology_device_list);
/* Update the SYSFS tree, since we added another topology
* device
*/
- if (kfd_topology_update_sysfs() < 0)
- kfd_topology_release_sysfs();
-
+ res = kfd_topology_update_sysfs();
+ up_write(&topology_lock);
+
+ if (!res)
+ sys_props.generation_count++;
+ else
+ pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n",
+ gpu_id, res);
+ dev = kfd_assign_gpu(gpu);
+ if (WARN_ON(!dev)) {
+ res = -ENODEV;
+ goto err;
+ }
}
dev->gpu_id = gpu_id;
gpu->id = gpu_id;
+
+ /* TODO: Move the following lines to function
+ * kfd_add_non_crat_information
+ */
+
+ /* Fill-in additional information that is not available in CRAT but
+ * needed for the topology
+ */
+
+ dev->gpu->kfd2kgd->get_cu_info(dev->gpu->kgd, &cu_info);
+ dev->node_props.simd_arrays_per_engine =
+ cu_info.num_shader_arrays_per_engine;
+
dev->node_props.vendor_id = gpu->pdev->vendor;
dev->node_props.device_id = gpu->pdev->device;
- dev->node_props.location_id = (gpu->pdev->bus->number << 24) +
- (gpu->pdev->devfn & 0xffffff);
- /*
- * TODO: Retrieve max engine clock values from KGD
- */
+ dev->node_props.location_id = PCI_DEVID(gpu->pdev->bus->number,
+ gpu->pdev->devfn);
+ dev->node_props.max_engine_clk_fcompute =
+ dev->gpu->kfd2kgd->get_max_engine_clock_in_mhz(dev->gpu->kgd);
+ dev->node_props.max_engine_clk_ccompute =
+ cpufreq_quick_get_max(0) / 1000;
+
+ kfd_fill_mem_clk_max_info(dev);
+ kfd_fill_iolink_non_crat_info(dev);
+
+ switch (dev->gpu->device_info->asic_family) {
+ case CHIP_KAVERI:
+ case CHIP_HAWAII:
+ case CHIP_TONGA:
+ dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_PRE_1_0 <<
+ HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
+ HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
+ break;
+ case CHIP_CARRIZO:
+ case CHIP_FIJI:
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ pr_debug("Adding doorbell packet type capability\n");
+ dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 <<
+ HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
+ HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
+ break;
+ default:
+ WARN(1, "Unexpected ASIC family %u",
+ dev->gpu->device_info->asic_family);
+ }
+ /* Fix errors in CZ CRAT.
+ * simd_count: Carrizo CRAT reports wrong simd_count, probably
+ * because it doesn't consider masked out CUs
+ * max_waves_per_simd: Carrizo reports wrong max_waves_per_simd
+ * capability flag: Carrizo CRAT doesn't report IOMMU flags
+ */
if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) {
- dev->node_props.capability |= HSA_CAP_DOORBELL_PACKET_TYPE;
- pr_info("Adding doorbell packet type capability\n");
+ dev->node_props.simd_count =
+ cu_info.simd_per_cu * cu_info.cu_active_number;
+ dev->node_props.max_waves_per_simd = 10;
+ dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
}
- res = 0;
-
-err:
- up_write(&topology_lock);
+ kfd_debug_print_topology();
- if (res == 0)
+ if (!res)
kfd_notify_gpu_change(gpu_id, 1);
-
+err:
+ kfd_destroy_crat_image(crat_image);
return res;
}
int kfd_topology_remove_device(struct kfd_dev *gpu)
{
- struct kfd_topology_device *dev;
+ struct kfd_topology_device *dev, *tmp;
uint32_t gpu_id;
int res = -ENODEV;
down_write(&topology_lock);
- list_for_each_entry(dev, &topology_device_list, list)
+ list_for_each_entry_safe(dev, tmp, &topology_device_list, list)
if (dev->gpu == gpu) {
gpu_id = dev->gpu_id;
kfd_remove_sysfs_node_entry(dev);
kfd_release_topology_device(dev);
+ sys_props.num_devices--;
res = 0;
if (kfd_topology_update_sysfs() < 0)
kfd_topology_release_sysfs();
@@ -1201,28 +1294,32 @@ int kfd_topology_remove_device(struct kfd_dev *gpu)
up_write(&topology_lock);
- if (res == 0)
+ if (!res)
kfd_notify_gpu_change(gpu_id, 0);
return res;
}
-/*
- * When idx is out of bounds, the function will return NULL
+/* kfd_topology_enum_kfd_devices - Enumerate through all devices in KFD
+ * topology. If GPU device is found @idx, then valid kfd_dev pointer is
+ * returned through @kdev
+ * Return - 0: On success (@kdev will be NULL for non GPU nodes)
+ * -1: If end of list
*/
-struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx)
+int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev)
{
struct kfd_topology_device *top_dev;
- struct kfd_dev *device = NULL;
uint8_t device_idx = 0;
+ *kdev = NULL;
down_read(&topology_lock);
list_for_each_entry(top_dev, &topology_device_list, list) {
if (device_idx == idx) {
- device = top_dev->gpu;
- break;
+ *kdev = top_dev->gpu;
+ up_read(&topology_lock);
+ return 0;
}
device_idx++;
@@ -1230,6 +1327,88 @@ struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx)
up_read(&topology_lock);
- return device;
+ return -1;
+
+}
+
+static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask)
+{
+ const struct cpuinfo_x86 *cpuinfo;
+ int first_cpu_of_numa_node;
+
+ if (!cpumask || cpumask == cpu_none_mask)
+ return -1;
+ first_cpu_of_numa_node = cpumask_first(cpumask);
+ if (first_cpu_of_numa_node >= nr_cpu_ids)
+ return -1;
+ cpuinfo = &cpu_data(first_cpu_of_numa_node);
+ return cpuinfo->apicid;
}
+
+/* kfd_numa_node_to_apic_id - Returns the APIC ID of the first logical processor
+ * of the given NUMA node (numa_node_id)
+ * Return -1 on failure
+ */
+int kfd_numa_node_to_apic_id(int numa_node_id)
+{
+ if (numa_node_id == -1) {
+ pr_warn("Invalid NUMA Node. Use online CPU mask\n");
+ return kfd_cpumask_to_apic_id(cpu_online_mask);
+ }
+ return kfd_cpumask_to_apic_id(cpumask_of_node(numa_node_id));
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data)
+{
+ struct kfd_topology_device *dev;
+ unsigned int i = 0;
+ int r = 0;
+
+ down_read(&topology_lock);
+
+ list_for_each_entry(dev, &topology_device_list, list) {
+ if (!dev->gpu) {
+ i++;
+ continue;
+ }
+
+ seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id);
+ r = dqm_debugfs_hqds(m, dev->gpu->dqm);
+ if (r)
+ break;
+ }
+
+ up_read(&topology_lock);
+
+ return r;
+}
+
+int kfd_debugfs_rls_by_device(struct seq_file *m, void *data)
+{
+ struct kfd_topology_device *dev;
+ unsigned int i = 0;
+ int r = 0;
+
+ down_read(&topology_lock);
+
+ list_for_each_entry(dev, &topology_device_list, list) {
+ if (!dev->gpu) {
+ i++;
+ continue;
+ }
+
+ seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id);
+ r = pm_debugfs_runlist(m, &dev->gpu->dqm->packets);
+ if (r)
+ break;
+ }
+
+ up_read(&topology_lock);
+
+ return r;
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index c3ddb9b95ff8..53fca1f45401 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -39,8 +39,13 @@
#define HSA_CAP_WATCH_POINTS_SUPPORTED 0x00000080
#define HSA_CAP_WATCH_POINTS_TOTALBITS_MASK 0x00000f00
#define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT 8
-#define HSA_CAP_RESERVED 0xfffff000
-#define HSA_CAP_DOORBELL_PACKET_TYPE 0x00001000
+#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK 0x00003000
+#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT 12
+#define HSA_CAP_RESERVED 0xffffc000
+
+#define HSA_CAP_DOORBELL_TYPE_PRE_1_0 0x0
+#define HSA_CAP_DOORBELL_TYPE_1_0 0x1
+#define HSA_CAP_AQL_QUEUE_DOUBLE_MAP 0x00004000
struct kfd_node_properties {
uint32_t cpu_cores_count;
@@ -91,8 +96,6 @@ struct kfd_mem_properties {
struct attribute attr;
};
-#define KFD_TOPOLOGY_CPU_SIBLINGS 256
-
#define HSA_CACHE_TYPE_DATA 0x00000001
#define HSA_CACHE_TYPE_INSTRUCTION 0x00000002
#define HSA_CACHE_TYPE_CPU 0x00000004
@@ -109,7 +112,7 @@ struct kfd_cache_properties {
uint32_t cache_assoc;
uint32_t cache_latency;
uint32_t cache_type;
- uint8_t sibling_map[KFD_TOPOLOGY_CPU_SIBLINGS];
+ uint8_t sibling_map[CRAT_SIBLINGMAP_SIZE];
struct kobject *kobj;
struct attribute attr;
};
@@ -132,24 +135,36 @@ struct kfd_iolink_properties {
struct attribute attr;
};
+struct kfd_perf_properties {
+ struct list_head list;
+ char block_name[16];
+ uint32_t max_concurrent;
+ struct attribute_group *attr_group;
+};
+
struct kfd_topology_device {
struct list_head list;
uint32_t gpu_id;
+ uint32_t proximity_domain;
struct kfd_node_properties node_props;
- uint32_t mem_bank_count;
struct list_head mem_props;
uint32_t cache_count;
struct list_head cache_props;
uint32_t io_link_count;
struct list_head io_link_props;
+ struct list_head perf_props;
struct kfd_dev *gpu;
struct kobject *kobj_node;
struct kobject *kobj_mem;
struct kobject *kobj_cache;
struct kobject *kobj_iolink;
+ struct kobject *kobj_perf;
struct attribute attr_gpuid;
struct attribute attr_name;
struct attribute attr_props;
+ uint8_t oem_id[CRAT_OEMID_LENGTH];
+ uint8_t oem_table_id[CRAT_OEMTABLEID_LENGTH];
+ uint32_t oem_revision;
};
struct kfd_system_properties {
@@ -164,6 +179,12 @@ struct kfd_system_properties {
struct attribute attr_props;
};
+struct kfd_topology_device *kfd_create_topology_device(
+ struct list_head *device_list);
+void kfd_release_topology_device_list(struct list_head *device_list);
+extern bool amd_iommu_pc_supported(void);
+extern u8 amd_iommu_pc_get_max_banks(u16 devid);
+extern u8 amd_iommu_pc_get_max_counters(u16 devid);
#endif /* __KFD_TOPOLOGY_H__ */
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index f516fd10e6ba..a6752bd0c871 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -46,6 +46,28 @@ enum kfd_preempt_type {
KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
};
+struct kfd_cu_info {
+ uint32_t num_shader_engines;
+ uint32_t num_shader_arrays_per_engine;
+ uint32_t num_cu_per_sh;
+ uint32_t cu_active_number;
+ uint32_t cu_ao_mask;
+ uint32_t simd_per_cu;
+ uint32_t max_waves_per_simd;
+ uint32_t wave_front_size;
+ uint32_t max_scratch_slots_per_cu;
+ uint32_t lds_size;
+ uint32_t cu_bitmap[4][4];
+};
+
+/* For getting GPU local memory information from KGD */
+struct kfd_local_mem_info {
+ uint64_t local_mem_size_private;
+ uint64_t local_mem_size_public;
+ uint32_t vram_width;
+ uint32_t mem_clk_max;
+};
+
enum kgd_memory_pool {
KGD_POOL_SYSTEM_CACHEABLE = 1,
KGD_POOL_SYSTEM_WRITECOMBINE = 2,
@@ -106,7 +128,7 @@ struct tile_config {
*
* @free_gtt_mem: Frees a buffer that was allocated on the gart aperture
*
- * @get_vmem_size: Retrieves (physical) size of VRAM
+ * @get_local_mem_info: Retrieves information about GPU local memory
*
* @get_gpu_clock_counter: Retrieves GPU clock counter
*
@@ -131,6 +153,12 @@ struct tile_config {
* @hqd_sdma_load: Loads the SDMA mqd structure to a H/W SDMA hqd slot.
* used only for no HWS mode.
*
+ * @hqd_dump: Dumps CPC HQD registers to an array of address-value pairs.
+ * Array is allocated with kmalloc, needs to be freed with kfree by caller.
+ *
+ * @hqd_sdma_dump: Dumps SDMA HQD registers to an array of address-value pairs.
+ * Array is allocated with kmalloc, needs to be freed with kfree by caller.
+ *
* @hqd_is_occupies: Checks if a hqd slot is occupied.
*
* @hqd_destroy: Destructs and preempts the queue assigned to that hqd slot.
@@ -147,6 +175,10 @@ struct tile_config {
*
* @get_tile_config: Returns GPU-specific tiling mode information
*
+ * @get_cu_info: Retrieves activated cu info
+ *
+ * @get_vram_usage: Returns current VRAM usage
+ *
* This structure contains function pointers to services that the kgd driver
* provides to amdkfd driver.
*
@@ -158,7 +190,8 @@ struct kfd2kgd_calls {
void (*free_gtt_mem)(struct kgd_dev *kgd, void *mem_obj);
- uint64_t (*get_vmem_size)(struct kgd_dev *kgd);
+ void (*get_local_mem_info)(struct kgd_dev *kgd,
+ struct kfd_local_mem_info *mem_info);
uint64_t (*get_gpu_clock_counter)(struct kgd_dev *kgd);
uint32_t (*get_max_engine_clock_in_mhz)(struct kgd_dev *kgd);
@@ -184,7 +217,16 @@ struct kfd2kgd_calls {
uint32_t wptr_shift, uint32_t wptr_mask,
struct mm_struct *mm);
- int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd);
+ int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd,
+ uint32_t __user *wptr, struct mm_struct *mm);
+
+ int (*hqd_dump)(struct kgd_dev *kgd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs);
+
+ int (*hqd_sdma_dump)(struct kgd_dev *kgd,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs);
bool (*hqd_is_occupied)(struct kgd_dev *kgd, uint64_t queue_address,
uint32_t pipe_id, uint32_t queue_id);
@@ -224,6 +266,10 @@ struct kfd2kgd_calls {
void (*set_scratch_backing_va)(struct kgd_dev *kgd,
uint64_t va, uint32_t vmid);
int (*get_tile_config)(struct kgd_dev *kgd, struct tile_config *config);
+
+ void (*get_cu_info)(struct kgd_dev *kgd,
+ struct kfd_cu_info *cu_info);
+ uint64_t (*get_vram_usage)(struct kgd_dev *kgd);
};
/**
diff --git a/drivers/gpu/drm/amd/include/vi_structs.h b/drivers/gpu/drm/amd/include/vi_structs.h
index 20234820194b..717fbae1d362 100644
--- a/drivers/gpu/drm/amd/include/vi_structs.h
+++ b/drivers/gpu/drm/amd/include/vi_structs.h
@@ -153,6 +153,8 @@ struct vi_sdma_mqd {
uint32_t reserved_125;
uint32_t reserved_126;
uint32_t reserved_127;
+ uint32_t sdma_engine_id;
+ uint32_t sdma_queue_id;
};
struct vi_mqd {
diff --git a/drivers/gpu/drm/armada/armada_crtc.c b/drivers/gpu/drm/armada/armada_crtc.c
index 2e065facdce7..e2adfbef7d6b 100644
--- a/drivers/gpu/drm/armada/armada_crtc.c
+++ b/drivers/gpu/drm/armada/armada_crtc.c
@@ -13,6 +13,7 @@
#include <drm/drmP.h>
#include <drm/drm_crtc_helper.h>
#include <drm/drm_plane_helper.h>
+#include <drm/drm_atomic_helper.h>
#include "armada_crtc.h"
#include "armada_drm.h"
#include "armada_fb.h"
@@ -20,13 +21,6 @@
#include "armada_hw.h"
#include "armada_trace.h"
-struct armada_frame_work {
- struct armada_plane_work work;
- struct drm_pending_vblank_event *event;
- struct armada_regs regs[4];
- struct drm_framebuffer *old_fb;
-};
-
enum csc_mode {
CSC_AUTO = 0,
CSC_YUV_CCIR601 = 1,
@@ -168,16 +162,23 @@ static void armada_drm_crtc_update(struct armada_crtc *dcrtc)
void armada_drm_plane_calc_addrs(u32 *addrs, struct drm_framebuffer *fb,
int x, int y)
{
+ const struct drm_format_info *format = fb->format;
+ unsigned int num_planes = format->num_planes;
u32 addr = drm_fb_obj(fb)->dev_addr;
- int num_planes = fb->format->num_planes;
int i;
if (num_planes > 3)
num_planes = 3;
- for (i = 0; i < num_planes; i++)
+ addrs[0] = addr + fb->offsets[0] + y * fb->pitches[0] +
+ x * format->cpp[0];
+
+ y /= format->vsub;
+ x /= format->hsub;
+
+ for (i = 1; i < num_planes; i++)
addrs[i] = addr + fb->offsets[i] + y * fb->pitches[i] +
- x * fb->format->cpp[i];
+ x * format->cpp[i];
for (; i < 3; i++)
addrs[i] = 0;
}
@@ -209,6 +210,38 @@ static unsigned armada_drm_crtc_calc_fb(struct drm_framebuffer *fb,
return i;
}
+static void armada_drm_plane_work_call(struct armada_crtc *dcrtc,
+ struct armada_plane_work *work,
+ void (*fn)(struct armada_crtc *, struct armada_plane_work *))
+{
+ struct armada_plane *dplane = drm_to_armada_plane(work->plane);
+ struct drm_pending_vblank_event *event;
+ struct drm_framebuffer *fb;
+
+ if (fn)
+ fn(dcrtc, work);
+ drm_crtc_vblank_put(&dcrtc->crtc);
+
+ event = work->event;
+ fb = work->old_fb;
+ if (event || fb) {
+ struct drm_device *dev = dcrtc->crtc.dev;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->event_lock, flags);
+ if (event)
+ drm_crtc_send_vblank_event(&dcrtc->crtc, event);
+ if (fb)
+ __armada_drm_queue_unref_work(dev, fb);
+ spin_unlock_irqrestore(&dev->event_lock, flags);
+ }
+
+ if (work->need_kfree)
+ kfree(work);
+
+ wake_up(&dplane->frame_wait);
+}
+
static void armada_drm_plane_work_run(struct armada_crtc *dcrtc,
struct drm_plane *plane)
{
@@ -216,24 +249,19 @@ static void armada_drm_plane_work_run(struct armada_crtc *dcrtc,
struct armada_plane_work *work = xchg(&dplane->work, NULL);
/* Handle any pending frame work. */
- if (work) {
- work->fn(dcrtc, dplane, work);
- drm_crtc_vblank_put(&dcrtc->crtc);
- }
-
- wake_up(&dplane->frame_wait);
+ if (work)
+ armada_drm_plane_work_call(dcrtc, work, work->fn);
}
int armada_drm_plane_work_queue(struct armada_crtc *dcrtc,
- struct armada_plane *plane, struct armada_plane_work *work)
+ struct armada_plane_work *work)
{
+ struct armada_plane *plane = drm_to_armada_plane(work->plane);
int ret;
ret = drm_crtc_vblank_get(&dcrtc->crtc);
- if (ret) {
- DRM_ERROR("failed to acquire vblank counter\n");
+ if (ret)
return ret;
- }
ret = cmpxchg(&plane->work, NULL, work) ? -EBUSY : 0;
if (ret)
@@ -247,51 +275,60 @@ int armada_drm_plane_work_wait(struct armada_plane *plane, long timeout)
return wait_event_timeout(plane->frame_wait, !plane->work, timeout);
}
-struct armada_plane_work *armada_drm_plane_work_cancel(
- struct armada_crtc *dcrtc, struct armada_plane *plane)
+void armada_drm_plane_work_cancel(struct armada_crtc *dcrtc,
+ struct armada_plane *dplane)
{
- struct armada_plane_work *work = xchg(&plane->work, NULL);
+ struct armada_plane_work *work = xchg(&dplane->work, NULL);
if (work)
- drm_crtc_vblank_put(&dcrtc->crtc);
-
- return work;
+ armada_drm_plane_work_call(dcrtc, work, work->cancel);
}
-static int armada_drm_crtc_queue_frame_work(struct armada_crtc *dcrtc,
- struct armada_frame_work *work)
+static void armada_drm_crtc_complete_frame_work(struct armada_crtc *dcrtc,
+ struct armada_plane_work *work)
{
- struct armada_plane *plane = drm_to_armada_plane(dcrtc->crtc.primary);
+ unsigned long flags;
- return armada_drm_plane_work_queue(dcrtc, plane, &work->work);
+ spin_lock_irqsave(&dcrtc->irq_lock, flags);
+ armada_drm_crtc_update_regs(dcrtc, work->regs);
+ spin_unlock_irqrestore(&dcrtc->irq_lock, flags);
}
-static void armada_drm_crtc_complete_frame_work(struct armada_crtc *dcrtc,
- struct armada_plane *plane, struct armada_plane_work *work)
+static void armada_drm_crtc_complete_disable_work(struct armada_crtc *dcrtc,
+ struct armada_plane_work *work)
{
- struct armada_frame_work *fwork = container_of(work, struct armada_frame_work, work);
- struct drm_device *dev = dcrtc->crtc.dev;
unsigned long flags;
+ if (dcrtc->plane == work->plane)
+ dcrtc->plane = NULL;
+
spin_lock_irqsave(&dcrtc->irq_lock, flags);
- armada_drm_crtc_update_regs(dcrtc, fwork->regs);
+ armada_drm_crtc_update_regs(dcrtc, work->regs);
spin_unlock_irqrestore(&dcrtc->irq_lock, flags);
+}
- if (fwork->event) {
- spin_lock_irqsave(&dev->event_lock, flags);
- drm_crtc_send_vblank_event(&dcrtc->crtc, fwork->event);
- spin_unlock_irqrestore(&dev->event_lock, flags);
- }
+static struct armada_plane_work *
+armada_drm_crtc_alloc_plane_work(struct drm_plane *plane)
+{
+ struct armada_plane_work *work;
+ int i = 0;
+
+ work = kzalloc(sizeof(*work), GFP_KERNEL);
+ if (!work)
+ return NULL;
- /* Finally, queue the process-half of the cleanup. */
- __armada_drm_queue_unref_work(dcrtc->crtc.dev, fwork->old_fb);
- kfree(fwork);
+ work->plane = plane;
+ work->fn = armada_drm_crtc_complete_frame_work;
+ work->need_kfree = true;
+ armada_reg_queue_end(work->regs, i);
+
+ return work;
}
static void armada_drm_crtc_finish_fb(struct armada_crtc *dcrtc,
struct drm_framebuffer *fb, bool force)
{
- struct armada_frame_work *work;
+ struct armada_plane_work *work;
if (!fb)
return;
@@ -302,15 +339,11 @@ static void armada_drm_crtc_finish_fb(struct armada_crtc *dcrtc,
return;
}
- work = kmalloc(sizeof(*work), GFP_KERNEL);
+ work = armada_drm_crtc_alloc_plane_work(dcrtc->crtc.primary);
if (work) {
- int i = 0;
- work->work.fn = armada_drm_crtc_complete_frame_work;
- work->event = NULL;
work->old_fb = fb;
- armada_reg_queue_end(work->regs, i);
- if (armada_drm_crtc_queue_frame_work(dcrtc, work) == 0)
+ if (armada_drm_plane_work_queue(dcrtc, work) == 0)
return;
kfree(work);
@@ -373,8 +406,11 @@ static void armada_drm_crtc_prepare(struct drm_crtc *crtc)
* the new mode parameters.
*/
plane = dcrtc->plane;
- if (plane)
+ if (plane) {
drm_plane_force_disable(plane);
+ WARN_ON(!armada_drm_plane_work_wait(drm_to_armada_plane(plane),
+ HZ));
+ }
}
/* The mode_config.mutex will be held for this call */
@@ -440,11 +476,11 @@ static void armada_drm_crtc_irq(struct armada_crtc *dcrtc, u32 stat)
if (stat & VSYNC_IRQ)
drm_crtc_handle_vblank(&dcrtc->crtc);
- spin_lock(&dcrtc->irq_lock);
ovl_plane = dcrtc->plane;
if (ovl_plane)
armada_drm_plane_work_run(dcrtc, ovl_plane);
+ spin_lock(&dcrtc->irq_lock);
if (stat & GRA_FRAME_IRQ && dcrtc->interlaced) {
int i = stat & GRA_FRAME_IRQ0 ? 0 : 1;
uint32_t val;
@@ -536,18 +572,14 @@ static uint32_t armada_drm_crtc_calculate_csc(struct armada_crtc *dcrtc)
return val;
}
-static void armada_drm_primary_set(struct drm_crtc *crtc,
- struct drm_plane *plane, int x, int y)
+static void armada_drm_gra_plane_regs(struct armada_regs *regs,
+ struct drm_framebuffer *fb, struct armada_plane_state *state,
+ int x, int y, bool interlaced)
{
- struct armada_plane_state *state = &drm_to_armada_plane(plane)->state;
- struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc);
- struct armada_regs regs[8];
- bool interlaced = dcrtc->interlaced;
- unsigned i;
+ unsigned int i;
u32 ctrl0;
- i = armada_drm_crtc_calc_fb(plane->fb, x, y, regs, interlaced);
-
+ i = armada_drm_crtc_calc_fb(fb, x, y, regs, interlaced);
armada_reg_queue_set(regs, i, state->dst_yx, LCD_SPU_GRA_OVSA_HPXL_VLN);
armada_reg_queue_set(regs, i, state->src_hw, LCD_SPU_GRA_HPXL_VLN);
armada_reg_queue_set(regs, i, state->dst_hw, LCD_SPU_GZM_HPXL_VLN);
@@ -559,9 +591,21 @@ static void armada_drm_primary_set(struct drm_crtc *crtc,
armada_reg_queue_mod(regs, i, ctrl0, CFG_GRAFORMAT |
CFG_GRA_MOD(CFG_SWAPRB | CFG_SWAPUV |
CFG_SWAPYU | CFG_YUV2RGB) |
- CFG_PALETTE_ENA | CFG_GRA_FTOGGLE,
+ CFG_PALETTE_ENA | CFG_GRA_FTOGGLE |
+ CFG_GRA_HSMOOTH | CFG_GRA_ENA,
LCD_SPU_DMA_CTRL0);
armada_reg_queue_end(regs, i);
+}
+
+static void armada_drm_primary_set(struct drm_crtc *crtc,
+ struct drm_plane *plane, int x, int y)
+{
+ struct armada_plane_state *state = &drm_to_armada_plane(plane)->state;
+ struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc);
+ struct armada_regs regs[8];
+ bool interlaced = dcrtc->interlaced;
+
+ armada_drm_gra_plane_regs(regs, plane->fb, state, x, y, interlaced);
armada_drm_crtc_update_regs(dcrtc, regs);
}
@@ -581,7 +625,7 @@ static int armada_drm_crtc_mode_set(struct drm_crtc *crtc,
interlaced = !!(adj->flags & DRM_MODE_FLAG_INTERLACE);
- val = CFG_GRA_ENA | CFG_GRA_HSMOOTH;
+ val = CFG_GRA_ENA;
val |= CFG_GRA_FMT(drm_fb_to_armada_fb(dcrtc->crtc.primary->fb)->fmt);
val |= CFG_GRA_MOD(drm_fb_to_armada_fb(dcrtc->crtc.primary->fb)->mod);
@@ -633,8 +677,6 @@ static int armada_drm_crtc_mode_set(struct drm_crtc *crtc,
/* Now compute the divider for real */
dcrtc->variant->compute_clock(dcrtc, adj, &sclk);
- /* Ensure graphic fifo is enabled */
- armada_reg_queue_mod(regs, i, 0, CFG_PDWN64x66, LCD_SPU_SRAM_PARA1);
armada_reg_queue_set(regs, i, sclk, LCD_CFG_SCLK_DIV);
if (interlaced ^ dcrtc->interlaced) {
@@ -647,6 +689,9 @@ static int armada_drm_crtc_mode_set(struct drm_crtc *crtc,
spin_lock_irqsave(&dcrtc->irq_lock, flags);
+ /* Ensure graphic fifo is enabled */
+ armada_reg_queue_mod(regs, i, 0, CFG_PDWN64x66, LCD_SPU_SRAM_PARA1);
+
/* Even interlaced/progressive frame */
dcrtc->v[1].spu_v_h_total = adj->crtc_vtotal << 16 |
adj->crtc_htotal;
@@ -729,47 +774,13 @@ static int armada_drm_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
return 0;
}
-void armada_drm_crtc_plane_disable(struct armada_crtc *dcrtc,
- struct drm_plane *plane)
-{
- u32 sram_para1, dma_ctrl0_mask;
-
- /*
- * Drop our reference on any framebuffer attached to this plane.
- * We don't need to NULL this out as drm_plane_force_disable(),
- * and __setplane_internal() will do so for an overlay plane, and
- * __drm_helper_disable_unused_functions() will do so for the
- * primary plane.
- */
- if (plane->fb)
- drm_framebuffer_put(plane->fb);
-
- /* Power down the Y/U/V FIFOs */
- sram_para1 = CFG_PDWN16x66 | CFG_PDWN32x66;
-
- /* Power down most RAMs and FIFOs if this is the primary plane */
- if (plane->type == DRM_PLANE_TYPE_PRIMARY) {
- sram_para1 |= CFG_PDWN256x32 | CFG_PDWN256x24 | CFG_PDWN256x8 |
- CFG_PDWN32x32 | CFG_PDWN64x66;
- dma_ctrl0_mask = CFG_GRA_ENA;
- } else {
- dma_ctrl0_mask = CFG_DMA_ENA;
- }
-
- spin_lock_irq(&dcrtc->irq_lock);
- armada_updatel(0, dma_ctrl0_mask, dcrtc->base + LCD_SPU_DMA_CTRL0);
- spin_unlock_irq(&dcrtc->irq_lock);
-
- armada_updatel(sram_para1, 0, dcrtc->base + LCD_SPU_SRAM_PARA1);
-}
-
/* The mode_config.mutex will be held for this call */
static void armada_drm_crtc_disable(struct drm_crtc *crtc)
{
- struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc);
-
armada_drm_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
- armada_drm_crtc_plane_disable(dcrtc, crtc->primary);
+
+ /* Disable our primary plane when we disable the CRTC. */
+ crtc->primary->funcs->disable_plane(crtc->primary, NULL);
}
static const struct drm_crtc_helper_funcs armada_crtc_helper_funcs = {
@@ -879,9 +890,11 @@ static int armada_drm_crtc_cursor_update(struct armada_crtc *dcrtc, bool reload)
return 0;
}
+ spin_lock_irq(&dcrtc->irq_lock);
para1 = readl_relaxed(dcrtc->base + LCD_SPU_SRAM_PARA1);
armada_updatel(CFG_CSB_256x32, CFG_CSB_256x32 | CFG_PDWN256x32,
dcrtc->base + LCD_SPU_SRAM_PARA1);
+ spin_unlock_irq(&dcrtc->irq_lock);
/*
* Initialize the transparency if the SRAM was powered down.
@@ -1021,7 +1034,7 @@ static int armada_drm_crtc_page_flip(struct drm_crtc *crtc,
struct drm_modeset_acquire_ctx *ctx)
{
struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc);
- struct armada_frame_work *work;
+ struct armada_plane_work *work;
unsigned i;
int ret;
@@ -1029,11 +1042,10 @@ static int armada_drm_crtc_page_flip(struct drm_crtc *crtc,
if (fb->format != crtc->primary->fb->format)
return -EINVAL;
- work = kmalloc(sizeof(*work), GFP_KERNEL);
+ work = armada_drm_crtc_alloc_plane_work(dcrtc->crtc.primary);
if (!work)
return -ENOMEM;
- work->work.fn = armada_drm_crtc_complete_frame_work;
work->event = event;
work->old_fb = dcrtc->crtc.primary->fb;
@@ -1047,7 +1059,7 @@ static int armada_drm_crtc_page_flip(struct drm_crtc *crtc,
*/
drm_framebuffer_get(fb);
- ret = armada_drm_crtc_queue_frame_work(dcrtc, work);
+ ret = armada_drm_plane_work_queue(dcrtc, work);
if (ret) {
/* Undo our reference above */
drm_framebuffer_put(fb);
@@ -1127,14 +1139,195 @@ static const struct drm_crtc_funcs armada_crtc_funcs = {
.disable_vblank = armada_drm_crtc_disable_vblank,
};
+static void armada_drm_primary_update_state(struct drm_plane_state *state,
+ struct armada_regs *regs)
+{
+ struct armada_plane *dplane = drm_to_armada_plane(state->plane);
+ struct armada_crtc *dcrtc = drm_to_armada_crtc(state->crtc);
+ struct armada_framebuffer *dfb = drm_fb_to_armada_fb(state->fb);
+ bool was_disabled;
+ unsigned int idx = 0;
+ u32 val;
+
+ val = CFG_GRA_FMT(dfb->fmt) | CFG_GRA_MOD(dfb->mod);
+ if (dfb->fmt > CFG_420)
+ val |= CFG_PALETTE_ENA;
+ if (state->visible)
+ val |= CFG_GRA_ENA;
+ if (drm_rect_width(&state->src) >> 16 != drm_rect_width(&state->dst))
+ val |= CFG_GRA_HSMOOTH;
+
+ was_disabled = !(dplane->state.ctrl0 & CFG_GRA_ENA);
+ if (was_disabled)
+ armada_reg_queue_mod(regs, idx,
+ 0, CFG_PDWN64x66, LCD_SPU_SRAM_PARA1);
+
+ dplane->state.ctrl0 = val;
+ dplane->state.src_hw = (drm_rect_height(&state->src) & 0xffff0000) |
+ drm_rect_width(&state->src) >> 16;
+ dplane->state.dst_hw = drm_rect_height(&state->dst) << 16 |
+ drm_rect_width(&state->dst);
+ dplane->state.dst_yx = state->dst.y1 << 16 | state->dst.x1;
+
+ armada_drm_gra_plane_regs(regs + idx, &dfb->fb, &dplane->state,
+ state->src.x1 >> 16, state->src.y1 >> 16,
+ dcrtc->interlaced);
+
+ dplane->state.vsync_update = !was_disabled;
+ dplane->state.changed = true;
+}
+
+static int armada_drm_primary_update(struct drm_plane *plane,
+ struct drm_crtc *crtc, struct drm_framebuffer *fb,
+ int crtc_x, int crtc_y, unsigned int crtc_w, unsigned int crtc_h,
+ uint32_t src_x, uint32_t src_y, uint32_t src_w, uint32_t src_h,
+ struct drm_modeset_acquire_ctx *ctx)
+{
+ struct armada_plane *dplane = drm_to_armada_plane(plane);
+ struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc);
+ struct armada_plane_work *work;
+ struct drm_plane_state state = {
+ .plane = plane,
+ .crtc = crtc,
+ .fb = fb,
+ .src_x = src_x,
+ .src_y = src_y,
+ .src_w = src_w,
+ .src_h = src_h,
+ .crtc_x = crtc_x,
+ .crtc_y = crtc_y,
+ .crtc_w = crtc_w,
+ .crtc_h = crtc_h,
+ .rotation = DRM_MODE_ROTATE_0,
+ };
+ const struct drm_rect clip = {
+ .x2 = crtc->mode.hdisplay,
+ .y2 = crtc->mode.vdisplay,
+ };
+ int ret;
+
+ ret = drm_atomic_helper_check_plane_state(&state, crtc->state, &clip, 0,
+ INT_MAX, true, false);
+ if (ret)
+ return ret;
+
+ work = &dplane->works[dplane->next_work];
+ work->fn = armada_drm_crtc_complete_frame_work;
+
+ if (plane->fb != fb) {
+ /*
+ * Take a reference on the new framebuffer - we want to
+ * hold on to it while the hardware is displaying it.
+ */
+ drm_framebuffer_reference(fb);
+
+ work->old_fb = plane->fb;
+ } else {
+ work->old_fb = NULL;
+ }
+
+ armada_drm_primary_update_state(&state, work->regs);
+
+ if (!dplane->state.changed)
+ return 0;
+
+ /* Wait for pending work to complete */
+ if (armada_drm_plane_work_wait(dplane, HZ / 10) == 0)
+ armada_drm_plane_work_cancel(dcrtc, dplane);
+
+ if (!dplane->state.vsync_update) {
+ work->fn(dcrtc, work);
+ if (work->old_fb)
+ drm_framebuffer_unreference(work->old_fb);
+ return 0;
+ }
+
+ /* Queue it for update on the next interrupt if we are enabled */
+ ret = armada_drm_plane_work_queue(dcrtc, work);
+ if (ret) {
+ work->fn(dcrtc, work);
+ if (work->old_fb)
+ drm_framebuffer_unreference(work->old_fb);
+ }
+
+ dplane->next_work = !dplane->next_work;
+
+ return 0;
+}
+
+int armada_drm_plane_disable(struct drm_plane *plane,
+ struct drm_modeset_acquire_ctx *ctx)
+{
+ struct armada_plane *dplane = drm_to_armada_plane(plane);
+ struct armada_crtc *dcrtc;
+ struct armada_plane_work *work;
+ unsigned int idx = 0;
+ u32 sram_para1, enable_mask;
+
+ if (!plane->crtc)
+ return 0;
+
+ /*
+ * Arrange to power down most RAMs and FIFOs if this is the primary
+ * plane, otherwise just the YUV FIFOs for the overlay plane.
+ */
+ if (plane->type == DRM_PLANE_TYPE_PRIMARY) {
+ sram_para1 = CFG_PDWN256x32 | CFG_PDWN256x24 | CFG_PDWN256x8 |
+ CFG_PDWN32x32 | CFG_PDWN64x66;
+ enable_mask = CFG_GRA_ENA;
+ } else {
+ sram_para1 = CFG_PDWN16x66 | CFG_PDWN32x66;
+ enable_mask = CFG_DMA_ENA;
+ }
+
+ dplane->state.ctrl0 &= ~enable_mask;
+
+ dcrtc = drm_to_armada_crtc(plane->crtc);
+
+ /*
+ * Try to disable the plane and drop our ref on the framebuffer
+ * at the next frame update. If we fail for any reason, disable
+ * the plane immediately.
+ */
+ work = &dplane->works[dplane->next_work];
+ work->fn = armada_drm_crtc_complete_disable_work;
+ work->cancel = armada_drm_crtc_complete_disable_work;
+ work->old_fb = plane->fb;
+
+ armada_reg_queue_mod(work->regs, idx,
+ 0, enable_mask, LCD_SPU_DMA_CTRL0);
+ armada_reg_queue_mod(work->regs, idx,
+ sram_para1, 0, LCD_SPU_SRAM_PARA1);
+ armada_reg_queue_end(work->regs, idx);
+
+ /* Wait for any preceding work to complete, but don't wedge */
+ if (WARN_ON(!armada_drm_plane_work_wait(dplane, HZ)))
+ armada_drm_plane_work_cancel(dcrtc, dplane);
+
+ if (armada_drm_plane_work_queue(dcrtc, work)) {
+ work->fn(dcrtc, work);
+ if (work->old_fb)
+ drm_framebuffer_unreference(work->old_fb);
+ }
+
+ dplane->next_work = !dplane->next_work;
+
+ return 0;
+}
+
static const struct drm_plane_funcs armada_primary_plane_funcs = {
- .update_plane = drm_primary_helper_update,
- .disable_plane = drm_primary_helper_disable,
+ .update_plane = armada_drm_primary_update,
+ .disable_plane = armada_drm_plane_disable,
.destroy = drm_primary_helper_destroy,
};
int armada_drm_plane_init(struct armada_plane *plane)
{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(plane->works); i++)
+ plane->works[i].plane = &plane->base;
+
init_waitqueue_head(&plane->frame_wait);
return 0;
@@ -1225,17 +1418,13 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev,
ret = devm_request_irq(dev, irq, armada_drm_irq, 0, "armada_drm_crtc",
dcrtc);
- if (ret < 0) {
- kfree(dcrtc);
- return ret;
- }
+ if (ret < 0)
+ goto err_crtc;
if (dcrtc->variant->init) {
ret = dcrtc->variant->init(dcrtc, dev);
- if (ret) {
- kfree(dcrtc);
- return ret;
- }
+ if (ret)
+ goto err_crtc;
}
/* Ensure AXI pipeline is enabled */
@@ -1246,13 +1435,15 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev,
dcrtc->crtc.port = port;
primary = kzalloc(sizeof(*primary), GFP_KERNEL);
- if (!primary)
- return -ENOMEM;
+ if (!primary) {
+ ret = -ENOMEM;
+ goto err_crtc;
+ }
ret = armada_drm_plane_init(primary);
if (ret) {
kfree(primary);
- return ret;
+ goto err_crtc;
}
ret = drm_universal_plane_init(drm, &primary->base, 0,
@@ -1263,7 +1454,7 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev,
DRM_PLANE_TYPE_PRIMARY, NULL);
if (ret) {
kfree(primary);
- return ret;
+ goto err_crtc;
}
ret = drm_crtc_init_with_planes(drm, &dcrtc->crtc, &primary->base, NULL,
@@ -1282,6 +1473,9 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev,
err_crtc_init:
primary->base.funcs->destroy(&primary->base);
+err_crtc:
+ kfree(dcrtc);
+
return ret;
}
diff --git a/drivers/gpu/drm/armada/armada_crtc.h b/drivers/gpu/drm/armada/armada_crtc.h
index bab11f483575..445829b8877a 100644
--- a/drivers/gpu/drm/armada/armada_crtc.h
+++ b/drivers/gpu/drm/armada/armada_crtc.h
@@ -36,21 +36,31 @@ struct armada_plane;
struct armada_variant;
struct armada_plane_work {
- void (*fn)(struct armada_crtc *,
- struct armada_plane *,
- struct armada_plane_work *);
+ void (*fn)(struct armada_crtc *, struct armada_plane_work *);
+ void (*cancel)(struct armada_crtc *, struct armada_plane_work *);
+ bool need_kfree;
+ struct drm_plane *plane;
+ struct drm_framebuffer *old_fb;
+ struct drm_pending_vblank_event *event;
+ struct armada_regs regs[14];
};
struct armada_plane_state {
+ u16 src_x;
+ u16 src_y;
u32 src_hw;
u32 dst_hw;
u32 dst_yx;
u32 ctrl0;
+ bool changed;
+ bool vsync_update;
};
struct armada_plane {
struct drm_plane base;
wait_queue_head_t frame_wait;
+ bool next_work;
+ struct armada_plane_work works[2];
struct armada_plane_work *work;
struct armada_plane_state state;
};
@@ -58,10 +68,10 @@ struct armada_plane {
int armada_drm_plane_init(struct armada_plane *plane);
int armada_drm_plane_work_queue(struct armada_crtc *dcrtc,
- struct armada_plane *plane, struct armada_plane_work *work);
+ struct armada_plane_work *work);
int armada_drm_plane_work_wait(struct armada_plane *plane, long timeout);
-struct armada_plane_work *armada_drm_plane_work_cancel(
- struct armada_crtc *dcrtc, struct armada_plane *plane);
+void armada_drm_plane_work_cancel(struct armada_crtc *dcrtc,
+ struct armada_plane *plane);
void armada_drm_plane_calc_addrs(u32 *addrs, struct drm_framebuffer *fb,
int x, int y);
@@ -104,8 +114,8 @@ struct armada_crtc {
void armada_drm_crtc_update_regs(struct armada_crtc *, struct armada_regs *);
-void armada_drm_crtc_plane_disable(struct armada_crtc *dcrtc,
- struct drm_plane *plane);
+int armada_drm_plane_disable(struct drm_plane *plane,
+ struct drm_modeset_acquire_ctx *ctx);
extern struct platform_driver armada_lcd_platform_driver;
diff --git a/drivers/gpu/drm/armada/armada_overlay.c b/drivers/gpu/drm/armada/armada_overlay.c
index b411b608821a..77b55adaa2ac 100644
--- a/drivers/gpu/drm/armada/armada_overlay.c
+++ b/drivers/gpu/drm/armada/armada_overlay.c
@@ -7,7 +7,7 @@
* published by the Free Software Foundation.
*/
#include <drm/drmP.h>
-#include <drm/drm_plane_helper.h>
+#include <drm/drm_atomic_helper.h>
#include "armada_crtc.h"
#include "armada_drm.h"
#include "armada_fb.h"
@@ -32,11 +32,6 @@ struct armada_ovl_plane_properties {
struct armada_ovl_plane {
struct armada_plane base;
- struct drm_framebuffer *old_fb;
- struct {
- struct armada_plane_work work;
- struct armada_regs regs[13];
- } vbl;
struct armada_ovl_plane_properties prop;
};
#define drm_to_armada_ovl_plane(p) \
@@ -67,218 +62,204 @@ armada_ovl_update_attr(struct armada_ovl_plane_properties *prop,
spin_unlock_irq(&dcrtc->irq_lock);
}
-static void armada_ovl_retire_fb(struct armada_ovl_plane *dplane,
- struct drm_framebuffer *fb)
-{
- struct drm_framebuffer *old_fb;
-
- old_fb = xchg(&dplane->old_fb, fb);
-
- if (old_fb)
- armada_drm_queue_unref_work(dplane->base.base.dev, old_fb);
-}
-
/* === Plane support === */
static void armada_ovl_plane_work(struct armada_crtc *dcrtc,
- struct armada_plane *plane, struct armada_plane_work *work)
+ struct armada_plane_work *work)
{
- struct armada_ovl_plane *dplane = container_of(plane, struct armada_ovl_plane, base);
+ unsigned long flags;
- trace_armada_ovl_plane_work(&dcrtc->crtc, &plane->base);
+ trace_armada_ovl_plane_work(&dcrtc->crtc, work->plane);
- armada_drm_crtc_update_regs(dcrtc, dplane->vbl.regs);
- armada_ovl_retire_fb(dplane, NULL);
+ spin_lock_irqsave(&dcrtc->irq_lock, flags);
+ armada_drm_crtc_update_regs(dcrtc, work->regs);
+ spin_unlock_irqrestore(&dcrtc->irq_lock, flags);
}
-static int
-armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc,
- struct drm_framebuffer *fb,
- int crtc_x, int crtc_y, unsigned crtc_w, unsigned crtc_h,
- uint32_t src_x, uint32_t src_y, uint32_t src_w, uint32_t src_h,
- struct drm_modeset_acquire_ctx *ctx)
+static void armada_ovl_plane_update_state(struct drm_plane_state *state,
+ struct armada_regs *regs)
{
- struct armada_ovl_plane *dplane = drm_to_armada_ovl_plane(plane);
- struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc);
- struct drm_rect src = {
- .x1 = src_x,
- .y1 = src_y,
- .x2 = src_x + src_w,
- .y2 = src_y + src_h,
- };
- struct drm_rect dest = {
- .x1 = crtc_x,
- .y1 = crtc_y,
- .x2 = crtc_x + crtc_w,
- .y2 = crtc_y + crtc_h,
- };
- const struct drm_rect clip = {
- .x2 = crtc->mode.hdisplay,
- .y2 = crtc->mode.vdisplay,
- };
- uint32_t val, ctrl0;
- unsigned idx = 0;
- bool visible;
- int ret;
-
- trace_armada_ovl_plane_update(plane, crtc, fb,
- crtc_x, crtc_y, crtc_w, crtc_h,
- src_x, src_y, src_w, src_h);
-
- ret = drm_plane_helper_check_update(plane, crtc, fb, &src, &dest, &clip,
- DRM_MODE_ROTATE_0,
- 0, INT_MAX, true, false, &visible);
- if (ret)
- return ret;
-
- ctrl0 = CFG_DMA_FMT(drm_fb_to_armada_fb(fb)->fmt) |
- CFG_DMA_MOD(drm_fb_to_armada_fb(fb)->mod) |
- CFG_CBSH_ENA | CFG_DMA_HSMOOTH | CFG_DMA_ENA;
-
- /* Does the position/size result in nothing to display? */
- if (!visible)
- ctrl0 &= ~CFG_DMA_ENA;
-
- if (!dcrtc->plane) {
- dcrtc->plane = plane;
- armada_ovl_update_attr(&dplane->prop, dcrtc);
- }
-
- /* FIXME: overlay on an interlaced display */
- /* Just updating the position/size? */
- if (plane->fb == fb && dplane->base.state.ctrl0 == ctrl0) {
- val = (drm_rect_height(&src) & 0xffff0000) |
- drm_rect_width(&src) >> 16;
- dplane->base.state.src_hw = val;
- writel_relaxed(val, dcrtc->base + LCD_SPU_DMA_HPXL_VLN);
-
- val = drm_rect_height(&dest) << 16 | drm_rect_width(&dest);
- dplane->base.state.dst_hw = val;
- writel_relaxed(val, dcrtc->base + LCD_SPU_DZM_HPXL_VLN);
-
- val = dest.y1 << 16 | dest.x1;
- dplane->base.state.dst_yx = val;
- writel_relaxed(val, dcrtc->base + LCD_SPU_DMA_OVSA_HPXL_VLN);
-
- return 0;
- } else if (~dplane->base.state.ctrl0 & ctrl0 & CFG_DMA_ENA) {
+ struct armada_ovl_plane *dplane = drm_to_armada_ovl_plane(state->plane);
+ struct armada_framebuffer *dfb = drm_fb_to_armada_fb(state->fb);
+ const struct drm_format_info *format;
+ unsigned int idx = 0;
+ bool fb_changed;
+ u32 val, ctrl0;
+ u16 src_x, src_y;
+
+ ctrl0 = CFG_DMA_FMT(dfb->fmt) | CFG_DMA_MOD(dfb->mod) | CFG_CBSH_ENA;
+ if (state->visible)
+ ctrl0 |= CFG_DMA_ENA;
+ if (drm_rect_width(&state->src) >> 16 != drm_rect_width(&state->dst))
+ ctrl0 |= CFG_DMA_HSMOOTH;
+
+ /*
+ * Shifting a YUV packed format image by one pixel causes the U/V
+ * planes to swap. Compensate for it by also toggling the UV swap.
+ */
+ format = dfb->fb.format;
+ if (format->num_planes == 1 && state->src.x1 >> 16 & (format->hsub - 1))
+ ctrl0 ^= CFG_DMA_MOD(CFG_SWAPUV);
+
+ if (~dplane->base.state.ctrl0 & ctrl0 & CFG_DMA_ENA) {
/* Power up the Y/U/V FIFOs on ENA 0->1 transitions */
- armada_updatel(0, CFG_PDWN16x66 | CFG_PDWN32x66,
- dcrtc->base + LCD_SPU_SRAM_PARA1);
+ armada_reg_queue_mod(regs, idx,
+ 0, CFG_PDWN16x66 | CFG_PDWN32x66,
+ LCD_SPU_SRAM_PARA1);
}
- if (armada_drm_plane_work_wait(&dplane->base, HZ / 25) == 0)
- armada_drm_plane_work_cancel(dcrtc, &dplane->base);
-
- if (plane->fb != fb) {
- u32 addrs[3], pixel_format;
- int num_planes, hsub;
-
- /*
- * Take a reference on the new framebuffer - we want to
- * hold on to it while the hardware is displaying it.
- */
- drm_framebuffer_get(fb);
-
- if (plane->fb)
- armada_ovl_retire_fb(dplane, plane->fb);
+ fb_changed = dplane->base.base.fb != &dfb->fb ||
+ dplane->base.state.src_x != state->src.x1 >> 16 ||
+ dplane->base.state.src_y != state->src.y1 >> 16;
- src_y = src.y1 >> 16;
- src_x = src.x1 >> 16;
+ dplane->base.state.vsync_update = fb_changed;
- armada_drm_plane_calc_addrs(addrs, fb, src_x, src_y);
+ /* FIXME: overlay on an interlaced display */
+ if (fb_changed) {
+ u32 addrs[3];
- pixel_format = fb->format->format;
- hsub = drm_format_horz_chroma_subsampling(pixel_format);
- num_planes = fb->format->num_planes;
+ dplane->base.state.src_y = src_y = state->src.y1 >> 16;
+ dplane->base.state.src_x = src_x = state->src.x1 >> 16;
- /*
- * Annoyingly, shifting a YUYV-format image by one pixel
- * causes the U/V planes to toggle. Toggle the UV swap.
- * (Unfortunately, this causes momentary colour flickering.)
- */
- if (src_x & (hsub - 1) && num_planes == 1)
- ctrl0 ^= CFG_DMA_MOD(CFG_SWAPUV);
+ armada_drm_plane_calc_addrs(addrs, &dfb->fb, src_x, src_y);
- armada_reg_queue_set(dplane->vbl.regs, idx, addrs[0],
+ armada_reg_queue_set(regs, idx, addrs[0],
LCD_SPU_DMA_START_ADDR_Y0);
- armada_reg_queue_set(dplane->vbl.regs, idx, addrs[1],
+ armada_reg_queue_set(regs, idx, addrs[1],
LCD_SPU_DMA_START_ADDR_U0);
- armada_reg_queue_set(dplane->vbl.regs, idx, addrs[2],
+ armada_reg_queue_set(regs, idx, addrs[2],
LCD_SPU_DMA_START_ADDR_V0);
- armada_reg_queue_set(dplane->vbl.regs, idx, addrs[0],
+ armada_reg_queue_set(regs, idx, addrs[0],
LCD_SPU_DMA_START_ADDR_Y1);
- armada_reg_queue_set(dplane->vbl.regs, idx, addrs[1],
+ armada_reg_queue_set(regs, idx, addrs[1],
LCD_SPU_DMA_START_ADDR_U1);
- armada_reg_queue_set(dplane->vbl.regs, idx, addrs[2],
+ armada_reg_queue_set(regs, idx, addrs[2],
LCD_SPU_DMA_START_ADDR_V1);
- val = fb->pitches[0] << 16 | fb->pitches[0];
- armada_reg_queue_set(dplane->vbl.regs, idx, val,
+ val = dfb->fb.pitches[0] << 16 | dfb->fb.pitches[0];
+ armada_reg_queue_set(regs, idx, val,
LCD_SPU_DMA_PITCH_YC);
- val = fb->pitches[1] << 16 | fb->pitches[2];
- armada_reg_queue_set(dplane->vbl.regs, idx, val,
+ val = dfb->fb.pitches[1] << 16 | dfb->fb.pitches[2];
+ armada_reg_queue_set(regs, idx, val,
LCD_SPU_DMA_PITCH_UV);
}
- val = (drm_rect_height(&src) & 0xffff0000) | drm_rect_width(&src) >> 16;
+ val = (drm_rect_height(&state->src) & 0xffff0000) |
+ drm_rect_width(&state->src) >> 16;
if (dplane->base.state.src_hw != val) {
dplane->base.state.src_hw = val;
- armada_reg_queue_set(dplane->vbl.regs, idx, val,
+ armada_reg_queue_set(regs, idx, val,
LCD_SPU_DMA_HPXL_VLN);
}
- val = drm_rect_height(&dest) << 16 | drm_rect_width(&dest);
+ val = drm_rect_height(&state->dst) << 16 | drm_rect_width(&state->dst);
if (dplane->base.state.dst_hw != val) {
dplane->base.state.dst_hw = val;
- armada_reg_queue_set(dplane->vbl.regs, idx, val,
+ armada_reg_queue_set(regs, idx, val,
LCD_SPU_DZM_HPXL_VLN);
}
- val = dest.y1 << 16 | dest.x1;
+ val = state->dst.y1 << 16 | state->dst.x1;
if (dplane->base.state.dst_yx != val) {
dplane->base.state.dst_yx = val;
- armada_reg_queue_set(dplane->vbl.regs, idx, val,
+ armada_reg_queue_set(regs, idx, val,
LCD_SPU_DMA_OVSA_HPXL_VLN);
}
if (dplane->base.state.ctrl0 != ctrl0) {
dplane->base.state.ctrl0 = ctrl0;
- armada_reg_queue_mod(dplane->vbl.regs, idx, ctrl0,
+ armada_reg_queue_mod(regs, idx, ctrl0,
CFG_CBSH_ENA | CFG_DMAFORMAT | CFG_DMA_FTOGGLE |
CFG_DMA_HSMOOTH | CFG_DMA_TSTMODE |
CFG_DMA_MOD(CFG_SWAPRB | CFG_SWAPUV | CFG_SWAPYU |
CFG_YUV2RGB) | CFG_DMA_ENA,
LCD_SPU_DMA_CTRL0);
+ dplane->base.state.vsync_update = true;
}
- if (idx) {
- armada_reg_queue_end(dplane->vbl.regs, idx);
- armada_drm_plane_work_queue(dcrtc, &dplane->base,
- &dplane->vbl.work);
- }
- return 0;
+
+ dplane->base.state.changed = idx != 0;
+
+ armada_reg_queue_end(regs, idx);
}
-static int armada_ovl_plane_disable(struct drm_plane *plane,
- struct drm_modeset_acquire_ctx *ctx)
+static int
+armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc,
+ struct drm_framebuffer *fb,
+ int crtc_x, int crtc_y, unsigned crtc_w, unsigned crtc_h,
+ uint32_t src_x, uint32_t src_y, uint32_t src_w, uint32_t src_h,
+ struct drm_modeset_acquire_ctx *ctx)
{
struct armada_ovl_plane *dplane = drm_to_armada_ovl_plane(plane);
- struct drm_framebuffer *fb;
- struct armada_crtc *dcrtc;
+ struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc);
+ struct armada_plane_work *work;
+ struct drm_plane_state state = {
+ .plane = plane,
+ .crtc = crtc,
+ .fb = fb,
+ .src_x = src_x,
+ .src_y = src_y,
+ .src_w = src_w,
+ .src_h = src_h,
+ .crtc_x = crtc_x,
+ .crtc_y = crtc_y,
+ .crtc_w = crtc_w,
+ .crtc_h = crtc_h,
+ .rotation = DRM_MODE_ROTATE_0,
+ };
+ const struct drm_rect clip = {
+ .x2 = crtc->mode.hdisplay,
+ .y2 = crtc->mode.vdisplay,
+ };
+ int ret;
- if (!dplane->base.base.crtc)
+ trace_armada_ovl_plane_update(plane, crtc, fb,
+ crtc_x, crtc_y, crtc_w, crtc_h,
+ src_x, src_y, src_w, src_h);
+
+ ret = drm_atomic_helper_check_plane_state(&state, crtc->state, &clip, 0,
+ INT_MAX, true, false);
+ if (ret)
+ return ret;
+
+ work = &dplane->base.works[dplane->base.next_work];
+
+ if (plane->fb != fb) {
+ /*
+ * Take a reference on the new framebuffer - we want to
+ * hold on to it while the hardware is displaying it.
+ */
+ drm_framebuffer_reference(fb);
+
+ work->old_fb = plane->fb;
+ } else {
+ work->old_fb = NULL;
+ }
+
+ armada_ovl_plane_update_state(&state, work->regs);
+
+ if (!dplane->base.state.changed)
return 0;
- dcrtc = drm_to_armada_crtc(dplane->base.base.crtc);
+ /* Wait for pending work to complete */
+ if (armada_drm_plane_work_wait(&dplane->base, HZ / 25) == 0)
+ armada_drm_plane_work_cancel(dcrtc, &dplane->base);
+
+ /* Just updating the position/size? */
+ if (!dplane->base.state.vsync_update) {
+ armada_ovl_plane_work(dcrtc, work);
+ return 0;
+ }
- armada_drm_plane_work_cancel(dcrtc, &dplane->base);
- armada_drm_crtc_plane_disable(dcrtc, plane);
+ if (!dcrtc->plane) {
+ dcrtc->plane = plane;
+ armada_ovl_update_attr(&dplane->prop, dcrtc);
+ }
- dcrtc->plane = NULL;
- dplane->base.state.ctrl0 = 0;
+ /* Queue it for update on the next interrupt if we are enabled */
+ ret = armada_drm_plane_work_queue(dcrtc, work);
+ if (ret)
+ DRM_ERROR("failed to queue plane work: %d\n", ret);
- fb = xchg(&dplane->old_fb, NULL);
- if (fb)
- drm_framebuffer_put(fb);
+ dplane->base.next_work = !dplane->base.next_work;
return 0;
}
@@ -362,7 +343,7 @@ static int armada_ovl_plane_set_property(struct drm_plane *plane,
static const struct drm_plane_funcs armada_ovl_plane_funcs = {
.update_plane = armada_ovl_plane_update,
- .disable_plane = armada_ovl_plane_disable,
+ .disable_plane = armada_drm_plane_disable,
.destroy = armada_ovl_plane_destroy,
.set_property = armada_ovl_plane_set_property,
};
@@ -454,7 +435,8 @@ int armada_overlay_plane_create(struct drm_device *dev, unsigned long crtcs)
return ret;
}
- dplane->vbl.work.fn = armada_ovl_plane_work;
+ dplane->base.works[0].fn = armada_ovl_plane_work;
+ dplane->base.works[1].fn = armada_ovl_plane_work;
ret = drm_universal_plane_init(dev, &dplane->base.base, crtcs,
&armada_ovl_plane_funcs,
diff --git a/drivers/gpu/drm/armada/armada_trace.h b/drivers/gpu/drm/armada/armada_trace.h
index 8dbfea7a00fe..f03a56bda596 100644
--- a/drivers/gpu/drm/armada/armada_trace.h
+++ b/drivers/gpu/drm/armada/armada_trace.h
@@ -34,14 +34,34 @@ TRACE_EVENT(armada_ovl_plane_update,
__field(struct drm_plane *, plane)
__field(struct drm_crtc *, crtc)
__field(struct drm_framebuffer *, fb)
+ __field(int, crtc_x)
+ __field(int, crtc_y)
+ __field(unsigned int, crtc_w)
+ __field(unsigned int, crtc_h)
+ __field(u32, src_x)
+ __field(u32, src_y)
+ __field(u32, src_w)
+ __field(u32, src_h)
),
TP_fast_assign(
__entry->plane = plane;
__entry->crtc = crtc;
__entry->fb = fb;
+ __entry->crtc_x = crtc_x;
+ __entry->crtc_y = crtc_y;
+ __entry->crtc_w = crtc_w;
+ __entry->crtc_h = crtc_h;
+ __entry->src_x = src_x;
+ __entry->src_y = src_y;
+ __entry->src_w = src_w;
+ __entry->src_h = src_h;
),
- TP_printk("plane %p crtc %p fb %p",
- __entry->plane, __entry->crtc, __entry->fb)
+ TP_printk("plane %p crtc %p fb %p crtc @ (%d,%d, %ux%u) src @ (%u,%u, %ux%u)",
+ __entry->plane, __entry->crtc, __entry->fb,
+ __entry->crtc_x, __entry->crtc_y,
+ __entry->crtc_w, __entry->crtc_h,
+ __entry->src_x >> 16, __entry->src_y >> 16,
+ __entry->src_w >> 16, __entry->src_h >> 16)
);
TRACE_EVENT(armada_ovl_plane_work,
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 37445d50816a..b76d49218cf1 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -50,7 +50,8 @@ EXPORT_SYMBOL(__drm_crtc_commit_free);
* @state: atomic state
*
* Free all the memory allocated by drm_atomic_state_init.
- * This is useful for drivers that subclass the atomic state.
+ * This should only be used by drivers which are still subclassing
+ * &drm_atomic_state and haven't switched to &drm_private_state yet.
*/
void drm_atomic_state_default_release(struct drm_atomic_state *state)
{
@@ -67,7 +68,8 @@ EXPORT_SYMBOL(drm_atomic_state_default_release);
* @state: atomic state
*
* Default implementation for filling in a new atomic state.
- * This is useful for drivers that subclass the atomic state.
+ * This should only be used by drivers which are still subclassing
+ * &drm_atomic_state and haven't switched to &drm_private_state yet.
*/
int
drm_atomic_state_init(struct drm_device *dev, struct drm_atomic_state *state)
@@ -132,7 +134,8 @@ EXPORT_SYMBOL(drm_atomic_state_alloc);
* @state: atomic state
*
* Default implementation for clearing atomic state.
- * This is useful for drivers that subclass the atomic state.
+ * This should only be used by drivers which are still subclassing
+ * &drm_atomic_state and haven't switched to &drm_private_state yet.
*/
void drm_atomic_state_default_clear(struct drm_atomic_state *state)
{
@@ -947,6 +950,42 @@ static void drm_atomic_plane_print_state(struct drm_printer *p,
}
/**
+ * DOC: handling driver private state
+ *
+ * Very often the DRM objects exposed to userspace in the atomic modeset api
+ * (&drm_connector, &drm_crtc and &drm_plane) do not map neatly to the
+ * underlying hardware. Especially for any kind of shared resources (e.g. shared
+ * clocks, scaler units, bandwidth and fifo limits shared among a group of
+ * planes or CRTCs, and so on) it makes sense to model these as independent
+ * objects. Drivers then need to do similar state tracking and commit ordering for
+ * such private (since not exposed to userpace) objects as the atomic core and
+ * helpers already provide for connectors, planes and CRTCs.
+ *
+ * To make this easier on drivers the atomic core provides some support to track
+ * driver private state objects using struct &drm_private_obj, with the
+ * associated state struct &drm_private_state.
+ *
+ * Similar to userspace-exposed objects, private state structures can be
+ * acquired by calling drm_atomic_get_private_obj_state(). Since this function
+ * does not take care of locking, drivers should wrap it for each type of
+ * private state object they have with the required call to drm_modeset_lock()
+ * for the corresponding &drm_modeset_lock.
+ *
+ * All private state structures contained in a &drm_atomic_state update can be
+ * iterated using for_each_oldnew_private_obj_in_state(),
+ * for_each_new_private_obj_in_state() and for_each_old_private_obj_in_state().
+ * Drivers are recommended to wrap these for each type of driver private state
+ * object they have, filtering on &drm_private_obj.funcs using for_each_if(), at
+ * least if they want to iterate over all objects of a given type.
+ *
+ * An earlier way to handle driver private state was by subclassing struct
+ * &drm_atomic_state. But since that encourages non-standard ways to implement
+ * the check/commit split atomic requires (by using e.g. "check and rollback or
+ * commit instead" of "duplicate state, check, then either commit or release
+ * duplicated state) it is deprecated in favour of using &drm_private_state.
+ */
+
+/**
* drm_atomic_private_obj_init - initialize private object
* @obj: private object
* @state: initial private object state
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 365901c1c33c..ddd537914575 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -4871,6 +4871,11 @@ EXPORT_SYMBOL(drm_hdmi_avi_infoframe_from_display_mode);
* @mode: DRM display mode
* @rgb_quant_range: RGB quantization range (Q)
* @rgb_quant_range_selectable: Sink support selectable RGB quantization range (QS)
+ * @is_hdmi2_sink: HDMI 2.0 sink, which has different default recommendations
+ *
+ * Note that @is_hdmi2_sink can be derived by looking at the
+ * &drm_scdc.supported flag stored in &drm_hdmi_info.scdc,
+ * &drm_display_info.hdmi, which can be found in &drm_connector.display_info.
*/
void
drm_hdmi_avi_infoframe_quant_range(struct hdmi_avi_infoframe *frame,
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 04a3a5ce370a..035784ddd133 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -66,19 +66,23 @@ static DEFINE_MUTEX(kernel_fb_helper_lock);
* helper functions used by many drivers to implement the kernel mode setting
* interfaces.
*
- * Initialization is done as a four-step process with drm_fb_helper_prepare(),
- * drm_fb_helper_init(), drm_fb_helper_single_add_all_connectors() and
- * drm_fb_helper_initial_config(). Drivers with fancier requirements than the
- * default behaviour can override the third step with their own code.
- * Teardown is done with drm_fb_helper_fini() after the fbdev device is
- * unregisters using drm_fb_helper_unregister_fbi().
- *
- * At runtime drivers should restore the fbdev console by calling
- * drm_fb_helper_restore_fbdev_mode_unlocked() from their &drm_driver.lastclose
- * callback. They should also notify the fb helper code from updates to the
- * output configuration by calling drm_fb_helper_hotplug_event(). For easier
- * integration with the output polling code in drm_crtc_helper.c the modeset
- * code provides a &drm_mode_config_funcs.output_poll_changed callback.
+ * Setup fbdev emulation by calling drm_fb_helper_fbdev_setup() and tear it
+ * down by calling drm_fb_helper_fbdev_teardown().
+ *
+ * Drivers that need to handle connector hotplugging (e.g. dp mst) can't use
+ * the setup helper and will need to do the whole four-step setup process with
+ * drm_fb_helper_prepare(), drm_fb_helper_init(),
+ * drm_fb_helper_single_add_all_connectors(), enable hotplugging and
+ * drm_fb_helper_initial_config() to avoid a possible race window.
+ *
+ * At runtime drivers should restore the fbdev console by using
+ * drm_fb_helper_lastclose() as their &drm_driver.lastclose callback.
+ * They should also notify the fb helper code from updates to the output
+ * configuration by using drm_fb_helper_output_poll_changed() as their
+ * &drm_mode_config_funcs.output_poll_changed callback.
+ *
+ * For suspend/resume consider using drm_mode_config_helper_suspend() and
+ * drm_mode_config_helper_resume() which takes care of fbdev as well.
*
* All other functions exported by the fb helper library can be used to
* implement the fbdev driver interface by the driver.
@@ -103,7 +107,8 @@ static DEFINE_MUTEX(kernel_fb_helper_lock);
* always run in process context since the fb_*() function could be running in
* atomic context. If drm_fb_helper_deferred_io() is used as the deferred_io
* callback it will also schedule dirty_work with the damage collected from the
- * mmap page writes.
+ * mmap page writes. Drivers can use drm_fb_helper_defio_init() to setup
+ * deferred I/O (coupled with drm_fb_helper_fbdev_teardown()).
*/
#define drm_fb_helper_for_each_connector(fbh, i__) \
@@ -1025,6 +1030,49 @@ void drm_fb_helper_deferred_io(struct fb_info *info,
EXPORT_SYMBOL(drm_fb_helper_deferred_io);
/**
+ * drm_fb_helper_defio_init - fbdev deferred I/O initialization
+ * @fb_helper: driver-allocated fbdev helper
+ *
+ * This function allocates &fb_deferred_io, sets callback to
+ * drm_fb_helper_deferred_io(), delay to 50ms and calls fb_deferred_io_init().
+ * It should be called from the &drm_fb_helper_funcs->fb_probe callback.
+ * drm_fb_helper_fbdev_teardown() cleans up deferred I/O.
+ *
+ * NOTE: A copy of &fb_ops is made and assigned to &info->fbops. This is done
+ * because fb_deferred_io_cleanup() clears &fbops->fb_mmap and would thereby
+ * affect other instances of that &fb_ops.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure.
+ */
+int drm_fb_helper_defio_init(struct drm_fb_helper *fb_helper)
+{
+ struct fb_info *info = fb_helper->fbdev;
+ struct fb_deferred_io *fbdefio;
+ struct fb_ops *fbops;
+
+ fbdefio = kzalloc(sizeof(*fbdefio), GFP_KERNEL);
+ fbops = kzalloc(sizeof(*fbops), GFP_KERNEL);
+ if (!fbdefio || !fbops) {
+ kfree(fbdefio);
+ kfree(fbops);
+ return -ENOMEM;
+ }
+
+ info->fbdefio = fbdefio;
+ fbdefio->delay = msecs_to_jiffies(50);
+ fbdefio->deferred_io = drm_fb_helper_deferred_io;
+
+ *fbops = *info->fbops;
+ info->fbops = fbops;
+
+ fb_deferred_io_init(info);
+
+ return 0;
+}
+EXPORT_SYMBOL(drm_fb_helper_defio_init);
+
+/**
* drm_fb_helper_sys_read - wrapper around fb_sys_read
* @info: fb_info struct pointer
* @buf: userspace buffer to read from framebuffer memory
@@ -1848,6 +1896,7 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper,
if (ret < 0)
return ret;
+ strcpy(fb_helper->fb->comm, "[fbcon]");
return 0;
}
@@ -2730,6 +2779,120 @@ int drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper)
EXPORT_SYMBOL(drm_fb_helper_hotplug_event);
/**
+ * drm_fb_helper_fbdev_setup() - Setup fbdev emulation
+ * @dev: DRM device
+ * @fb_helper: fbdev helper structure to set up
+ * @funcs: fbdev helper functions
+ * @preferred_bpp: Preferred bits per pixel for the device.
+ * @dev->mode_config.preferred_depth is used if this is zero.
+ * @max_conn_count: Maximum number of connectors.
+ * @dev->mode_config.num_connector is used if this is zero.
+ *
+ * This function sets up fbdev emulation and registers fbdev for access by
+ * userspace. If all connectors are disconnected, setup is deferred to the next
+ * time drm_fb_helper_hotplug_event() is called.
+ * The caller must to provide a &drm_fb_helper_funcs->fb_probe callback
+ * function.
+ *
+ * See also: drm_fb_helper_initial_config()
+ *
+ * Returns:
+ * Zero on success or negative error code on failure.
+ */
+int drm_fb_helper_fbdev_setup(struct drm_device *dev,
+ struct drm_fb_helper *fb_helper,
+ const struct drm_fb_helper_funcs *funcs,
+ unsigned int preferred_bpp,
+ unsigned int max_conn_count)
+{
+ int ret;
+
+ if (!preferred_bpp)
+ preferred_bpp = dev->mode_config.preferred_depth;
+ if (!preferred_bpp)
+ preferred_bpp = 32;
+
+ if (!max_conn_count)
+ max_conn_count = dev->mode_config.num_connector;
+ if (!max_conn_count) {
+ DRM_DEV_ERROR(dev->dev, "No connectors\n");
+ return -EINVAL;
+ }
+
+ drm_fb_helper_prepare(dev, fb_helper, funcs);
+
+ ret = drm_fb_helper_init(dev, fb_helper, max_conn_count);
+ if (ret < 0) {
+ DRM_DEV_ERROR(dev->dev, "Failed to initialize fbdev helper\n");
+ return ret;
+ }
+
+ ret = drm_fb_helper_single_add_all_connectors(fb_helper);
+ if (ret < 0) {
+ DRM_DEV_ERROR(dev->dev, "Failed to add connectors\n");
+ goto err_drm_fb_helper_fini;
+ }
+
+ if (!drm_drv_uses_atomic_modeset(dev))
+ drm_helper_disable_unused_functions(dev);
+
+ ret = drm_fb_helper_initial_config(fb_helper, preferred_bpp);
+ if (ret < 0) {
+ DRM_DEV_ERROR(dev->dev, "Failed to set fbdev configuration\n");
+ goto err_drm_fb_helper_fini;
+ }
+
+ return 0;
+
+err_drm_fb_helper_fini:
+ drm_fb_helper_fini(fb_helper);
+
+ return ret;
+}
+EXPORT_SYMBOL(drm_fb_helper_fbdev_setup);
+
+/**
+ * drm_fb_helper_fbdev_teardown - Tear down fbdev emulation
+ * @dev: DRM device
+ *
+ * This function unregisters fbdev if not already done and cleans up the
+ * associated resources including the &drm_framebuffer.
+ * The driver is responsible for freeing the &drm_fb_helper structure which is
+ * stored in &drm_device->fb_helper. Do note that this pointer has been cleared
+ * when this function returns.
+ *
+ * In order to support device removal/unplug while file handles are still open,
+ * drm_fb_helper_unregister_fbi() should be called on device removal and
+ * drm_fb_helper_fbdev_teardown() in the &drm_driver->release callback when
+ * file handles are closed.
+ */
+void drm_fb_helper_fbdev_teardown(struct drm_device *dev)
+{
+ struct drm_fb_helper *fb_helper = dev->fb_helper;
+ struct fb_ops *fbops = NULL;
+
+ if (!fb_helper)
+ return;
+
+ /* Unregister if it hasn't been done already */
+ if (fb_helper->fbdev && fb_helper->fbdev->dev)
+ drm_fb_helper_unregister_fbi(fb_helper);
+
+ if (fb_helper->fbdev && fb_helper->fbdev->fbdefio) {
+ fb_deferred_io_cleanup(fb_helper->fbdev);
+ kfree(fb_helper->fbdev->fbdefio);
+ fbops = fb_helper->fbdev->fbops;
+ }
+
+ drm_fb_helper_fini(fb_helper);
+ kfree(fbops);
+
+ if (fb_helper->fb)
+ drm_framebuffer_remove(fb_helper->fb);
+}
+EXPORT_SYMBOL(drm_fb_helper_fbdev_teardown);
+
+/**
* drm_fb_helper_lastclose - DRM driver lastclose helper for fbdev emulation
* @dev: DRM device
*
diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c
index d63d4c2ac4c8..5a13ff29f4f0 100644
--- a/drivers/gpu/drm/drm_framebuffer.c
+++ b/drivers/gpu/drm/drm_framebuffer.c
@@ -664,6 +664,7 @@ int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb,
INIT_LIST_HEAD(&fb->filp_head);
fb->funcs = funcs;
+ strcpy(fb->comm, current->comm);
ret = __drm_mode_object_add(dev, &fb->base, DRM_MODE_OBJECT_FB,
false, drm_framebuffer_free);
@@ -978,6 +979,7 @@ void drm_framebuffer_print_info(struct drm_printer *p, unsigned int indent,
struct drm_format_name_buf format_name;
unsigned int i;
+ drm_printf_indent(p, indent, "allocated by = %s\n", fb->comm);
drm_printf_indent(p, indent, "refcount=%u\n",
drm_framebuffer_read_refcount(fb));
drm_printf_indent(p, indent, "format=%s\n",
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 9b733c510cbf..131695915acd 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -29,9 +29,9 @@
/**
* DOC: Overview
*
- * DRM synchronisation objects (syncobj) are a persistent objects,
- * that contain an optional fence. The fence can be updated with a new
- * fence, or be NULL.
+ * DRM synchronisation objects (syncobj, see struct &drm_syncobj) are
+ * persistent objects that contain an optional fence. The fence can be updated
+ * with a new fence, or be NULL.
*
* syncobj's can be waited upon, where it will wait for the underlying
* fence.
@@ -61,7 +61,8 @@
* @file_private: drm file private pointer
* @handle: sync object handle to lookup.
*
- * Returns a reference to the syncobj pointed to by handle or NULL.
+ * Returns a reference to the syncobj pointed to by handle or NULL. The
+ * reference must be released by calling drm_syncobj_put().
*/
struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private,
u32 handle)
@@ -229,6 +230,19 @@ static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
return 0;
}
+/**
+ * drm_syncobj_find_fence - lookup and reference the fence in a sync object
+ * @file_private: drm file private pointer
+ * @handle: sync object handle to lookup.
+ * @fence: out parameter for the fence
+ *
+ * This is just a convenience function that combines drm_syncobj_find() and
+ * drm_syncobj_fence_get().
+ *
+ * Returns 0 on success or a negative error value on failure. On success @fence
+ * contains a reference to the fence, which must be released by calling
+ * dma_fence_put().
+ */
int drm_syncobj_find_fence(struct drm_file *file_private,
u32 handle,
struct dma_fence **fence)
@@ -269,6 +283,12 @@ EXPORT_SYMBOL(drm_syncobj_free);
* @out_syncobj: returned syncobj
* @flags: DRM_SYNCOBJ_* flags
* @fence: if non-NULL, the syncobj will represent this fence
+ *
+ * This is the first function to create a sync object. After creating, drivers
+ * probably want to make it available to userspace, either through
+ * drm_syncobj_get_handle() or drm_syncobj_get_fd().
+ *
+ * Returns 0 on success or a negative error value on failure.
*/
int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
struct dma_fence *fence)
@@ -302,6 +322,14 @@ EXPORT_SYMBOL(drm_syncobj_create);
/**
* drm_syncobj_get_handle - get a handle from a syncobj
+ * @file_private: drm file private pointer
+ * @syncobj: Sync object to export
+ * @handle: out parameter with the new handle
+ *
+ * Exports a sync object created with drm_syncobj_create() as a handle on
+ * @file_private to userspace.
+ *
+ * Returns 0 on success or a negative error value on failure.
*/
int drm_syncobj_get_handle(struct drm_file *file_private,
struct drm_syncobj *syncobj, u32 *handle)
@@ -388,6 +416,15 @@ static int drm_syncobj_alloc_file(struct drm_syncobj *syncobj)
return 0;
}
+/**
+ * drm_syncobj_get_fd - get a file descriptor from a syncobj
+ * @syncobj: Sync object to export
+ * @p_fd: out parameter with the new file descriptor
+ *
+ * Exports a sync object created with drm_syncobj_create() as a file descriptor.
+ *
+ * Returns 0 on success or a negative error value on failure.
+ */
int drm_syncobj_get_fd(struct drm_syncobj *syncobj, int *p_fd)
{
int ret;
diff --git a/drivers/gpu/drm/etnaviv/Kconfig b/drivers/gpu/drm/etnaviv/Kconfig
index a29b8f59eb15..3f58b4077767 100644
--- a/drivers/gpu/drm/etnaviv/Kconfig
+++ b/drivers/gpu/drm/etnaviv/Kconfig
@@ -6,6 +6,7 @@ config DRM_ETNAVIV
depends on MMU
select SHMEM
select SYNC_FILE
+ select THERMAL if DRM_ETNAVIV_THERMAL
select TMPFS
select WANT_DEV_COREDUMP
select CMA if HAVE_DMA_CONTIGUOUS
@@ -13,6 +14,14 @@ config DRM_ETNAVIV
help
DRM driver for Vivante GPUs.
+config DRM_ETNAVIV_THERMAL
+ bool "enable ETNAVIV thermal throttling"
+ depends on DRM_ETNAVIV
+ default y
+ help
+ Compile in support for thermal throttling.
+ Say Y unless you want to risk burning your SoC.
+
config DRM_ETNAVIV_REGISTER_LOGGING
bool "enable ETNAVIV register logging"
depends on DRM_ETNAVIV
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
index 9e7098e3207f..99ad2f073c6e 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
@@ -100,6 +100,8 @@ static void etnaviv_cmd_select_pipe(struct etnaviv_gpu *gpu,
{
u32 flush = 0;
+ lockdep_assert_held(&gpu->lock);
+
/*
* This assumes that if we're switching to 2D, we're switching
* away from 3D, and vice versa. Hence, if we're switching to
@@ -164,7 +166,9 @@ static u32 etnaviv_buffer_reserve(struct etnaviv_gpu *gpu,
u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu)
{
- struct etnaviv_cmdbuf *buffer = gpu->buffer;
+ struct etnaviv_cmdbuf *buffer = &gpu->buffer;
+
+ lockdep_assert_held(&gpu->lock);
/* initialize buffer */
buffer->user_size = 0;
@@ -178,7 +182,9 @@ u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu)
u16 etnaviv_buffer_config_mmuv2(struct etnaviv_gpu *gpu, u32 mtlb_addr, u32 safe_addr)
{
- struct etnaviv_cmdbuf *buffer = gpu->buffer;
+ struct etnaviv_cmdbuf *buffer = &gpu->buffer;
+
+ lockdep_assert_held(&gpu->lock);
buffer->user_size = 0;
@@ -211,10 +217,12 @@ u16 etnaviv_buffer_config_mmuv2(struct etnaviv_gpu *gpu, u32 mtlb_addr, u32 safe
void etnaviv_buffer_end(struct etnaviv_gpu *gpu)
{
- struct etnaviv_cmdbuf *buffer = gpu->buffer;
+ struct etnaviv_cmdbuf *buffer = &gpu->buffer;
unsigned int waitlink_offset = buffer->user_size - 16;
u32 link_target, flush = 0;
+ lockdep_assert_held(&gpu->lock);
+
if (gpu->exec_state == ETNA_PIPE_2D)
flush = VIVS_GL_FLUSH_CACHE_PE2D;
else if (gpu->exec_state == ETNA_PIPE_3D)
@@ -253,10 +261,12 @@ void etnaviv_buffer_end(struct etnaviv_gpu *gpu)
/* Append a 'sync point' to the ring buffer. */
void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event)
{
- struct etnaviv_cmdbuf *buffer = gpu->buffer;
+ struct etnaviv_cmdbuf *buffer = &gpu->buffer;
unsigned int waitlink_offset = buffer->user_size - 16;
u32 dwords, target;
+ lockdep_assert_held(&gpu->lock);
+
/*
* We need at most 3 dwords in the return target:
* 1 event + 1 end + 1 wait + 1 link.
@@ -287,13 +297,16 @@ void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event)
}
/* Append a command buffer to the ring buffer. */
-void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event,
- struct etnaviv_cmdbuf *cmdbuf)
+void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
+ unsigned int event, struct etnaviv_cmdbuf *cmdbuf)
{
- struct etnaviv_cmdbuf *buffer = gpu->buffer;
+ struct etnaviv_cmdbuf *buffer = &gpu->buffer;
unsigned int waitlink_offset = buffer->user_size - 16;
u32 return_target, return_dwords;
u32 link_target, link_dwords;
+ bool switch_context = gpu->exec_state != exec_state;
+
+ lockdep_assert_held(&gpu->lock);
if (drm_debug & DRM_UT_DRIVER)
etnaviv_buffer_dump(gpu, buffer, 0, 0x50);
@@ -306,7 +319,7 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event,
* need to append a mmu flush load state, followed by a new
* link to this buffer - a total of four additional words.
*/
- if (gpu->mmu->need_flush || gpu->switch_context) {
+ if (gpu->mmu->need_flush || switch_context) {
u32 target, extra_dwords;
/* link command */
@@ -321,7 +334,7 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event,
}
/* pipe switch commands */
- if (gpu->switch_context)
+ if (switch_context)
extra_dwords += 4;
target = etnaviv_buffer_reserve(gpu, buffer, extra_dwords);
@@ -349,10 +362,9 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event,
gpu->mmu->need_flush = false;
}
- if (gpu->switch_context) {
- etnaviv_cmd_select_pipe(gpu, buffer, cmdbuf->exec_state);
- gpu->exec_state = cmdbuf->exec_state;
- gpu->switch_context = false;
+ if (switch_context) {
+ etnaviv_cmd_select_pipe(gpu, buffer, exec_state);
+ gpu->exec_state = exec_state;
}
/* And the link to the submitted buffer */
@@ -421,4 +433,6 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event,
if (drm_debug & DRM_UT_DRIVER)
etnaviv_buffer_dump(gpu, buffer, 0, 0x50);
+
+ gpu->lastctx = cmdbuf->ctx;
}
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c b/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c
index 6e3bbcf24160..68e6d3772ad8 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c
@@ -78,6 +78,7 @@ static const struct {
ST(0x17c0, 8),
ST(0x17e0, 8),
ST(0x2400, 14 * 16),
+ ST(0x3824, 1),
ST(0x10800, 32 * 16),
ST(0x14600, 16),
ST(0x14800, 8 * 8),
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c
index 66ac79558bbd..3746827f45eb 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c
@@ -86,26 +86,11 @@ void etnaviv_cmdbuf_suballoc_destroy(struct etnaviv_cmdbuf_suballoc *suballoc)
kfree(suballoc);
}
-struct etnaviv_cmdbuf *
-etnaviv_cmdbuf_new(struct etnaviv_cmdbuf_suballoc *suballoc, u32 size,
- size_t nr_bos, size_t nr_pmrs)
+int etnaviv_cmdbuf_init(struct etnaviv_cmdbuf_suballoc *suballoc,
+ struct etnaviv_cmdbuf *cmdbuf, u32 size)
{
- struct etnaviv_cmdbuf *cmdbuf;
- struct etnaviv_perfmon_request *pmrs;
- size_t sz = size_vstruct(nr_bos, sizeof(cmdbuf->bo_map[0]),
- sizeof(*cmdbuf));
int granule_offs, order, ret;
- cmdbuf = kzalloc(sz, GFP_KERNEL);
- if (!cmdbuf)
- return NULL;
-
- sz = sizeof(*pmrs) * nr_pmrs;
- pmrs = kzalloc(sz, GFP_KERNEL);
- if (!pmrs)
- goto out_free_cmdbuf;
-
- cmdbuf->pmrs = pmrs;
cmdbuf->suballoc = suballoc;
cmdbuf->size = size;
@@ -123,7 +108,7 @@ retry:
if (!ret) {
dev_err(suballoc->gpu->dev,
"Timeout waiting for cmdbuf space\n");
- return NULL;
+ return -ETIMEDOUT;
}
goto retry;
}
@@ -131,11 +116,7 @@ retry:
cmdbuf->suballoc_offset = granule_offs * SUBALLOC_GRANULE;
cmdbuf->vaddr = suballoc->vaddr + cmdbuf->suballoc_offset;
- return cmdbuf;
-
-out_free_cmdbuf:
- kfree(cmdbuf);
- return NULL;
+ return 0;
}
void etnaviv_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf)
@@ -151,8 +132,6 @@ void etnaviv_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf)
suballoc->free_space = 1;
mutex_unlock(&suballoc->lock);
wake_up_all(&suballoc->free_event);
- kfree(cmdbuf->pmrs);
- kfree(cmdbuf);
}
u32 etnaviv_cmdbuf_get_va(struct etnaviv_cmdbuf *buf)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h
index b6348b9f2a9d..ddc3f7ea169c 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h
@@ -33,27 +33,15 @@ struct etnaviv_cmdbuf {
void *vaddr;
u32 size;
u32 user_size;
- /* fence after which this buffer is to be disposed */
- struct dma_fence *fence;
- /* target exec state */
- u32 exec_state;
- /* per GPU in-flight list */
- struct list_head node;
- /* perfmon requests */
- unsigned int nr_pmrs;
- struct etnaviv_perfmon_request *pmrs;
- /* BOs attached to this command buffer */
- unsigned int nr_bos;
- struct etnaviv_vram_mapping *bo_map[0];
};
struct etnaviv_cmdbuf_suballoc *
etnaviv_cmdbuf_suballoc_new(struct etnaviv_gpu * gpu);
void etnaviv_cmdbuf_suballoc_destroy(struct etnaviv_cmdbuf_suballoc *suballoc);
-struct etnaviv_cmdbuf *
-etnaviv_cmdbuf_new(struct etnaviv_cmdbuf_suballoc *suballoc, u32 size,
- size_t nr_bos, size_t nr_pmrs);
+
+int etnaviv_cmdbuf_init(struct etnaviv_cmdbuf_suballoc *suballoc,
+ struct etnaviv_cmdbuf *cmdbuf, u32 size);
void etnaviv_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf);
u32 etnaviv_cmdbuf_get_va(struct etnaviv_cmdbuf *buf);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index 491eddf9b150..6faf4042db23 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -172,7 +172,7 @@ static int etnaviv_mmu_show(struct etnaviv_gpu *gpu, struct seq_file *m)
static void etnaviv_buffer_dump(struct etnaviv_gpu *gpu, struct seq_file *m)
{
- struct etnaviv_cmdbuf *buf = gpu->buffer;
+ struct etnaviv_cmdbuf *buf = &gpu->buffer;
u32 size = buf->size;
u32 *ptr = buf->vaddr;
u32 i;
@@ -459,9 +459,6 @@ static int etnaviv_ioctl_pm_query_dom(struct drm_device *dev, void *data,
struct drm_etnaviv_pm_domain *args = data;
struct etnaviv_gpu *gpu;
- /* reject as long as the feature isn't stable */
- return -EINVAL;
-
if (args->pipe >= ETNA_MAX_PIPES)
return -EINVAL;
@@ -479,9 +476,6 @@ static int etnaviv_ioctl_pm_query_sig(struct drm_device *dev, void *data,
struct drm_etnaviv_pm_signal *args = data;
struct etnaviv_gpu *gpu;
- /* reject as long as the feature isn't stable */
- return -EINVAL;
-
if (args->pipe >= ETNA_MAX_PIPES)
return -EINVAL;
@@ -556,7 +550,7 @@ static struct drm_driver etnaviv_drm_driver = {
.desc = "etnaviv DRM",
.date = "20151214",
.major = 1,
- .minor = 1,
+ .minor = 2,
};
/*
@@ -580,12 +574,6 @@ static int etnaviv_bind(struct device *dev)
}
drm->dev_private = priv;
- priv->wq = alloc_ordered_workqueue("etnaviv", 0);
- if (!priv->wq) {
- ret = -ENOMEM;
- goto out_wq;
- }
-
mutex_init(&priv->gem_lock);
INIT_LIST_HEAD(&priv->gem_list);
priv->num_gpus = 0;
@@ -607,9 +595,6 @@ static int etnaviv_bind(struct device *dev)
out_register:
component_unbind_all(dev, drm);
out_bind:
- flush_workqueue(priv->wq);
- destroy_workqueue(priv->wq);
-out_wq:
kfree(priv);
out_unref:
drm_dev_unref(drm);
@@ -624,9 +609,6 @@ static void etnaviv_unbind(struct device *dev)
drm_dev_unregister(drm);
- flush_workqueue(priv->wq);
- destroy_workqueue(priv->wq);
-
component_unbind_all(dev, drm);
drm->dev_private = NULL;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.h b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
index d249acb6da08..a54f0b758a5c 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
@@ -56,18 +56,8 @@ struct etnaviv_drm_private {
/* list of GEM objects: */
struct mutex gem_lock;
struct list_head gem_list;
-
- struct workqueue_struct *wq;
};
-static inline void etnaviv_queue_work(struct drm_device *dev,
- struct work_struct *w)
-{
- struct etnaviv_drm_private *priv = dev->dev_private;
-
- queue_work(priv->wq, w);
-}
-
int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
struct drm_file *file);
@@ -97,8 +87,8 @@ u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu);
u16 etnaviv_buffer_config_mmuv2(struct etnaviv_gpu *gpu, u32 mtlb_addr, u32 safe_addr);
void etnaviv_buffer_end(struct etnaviv_gpu *gpu);
void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event);
-void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event,
- struct etnaviv_cmdbuf *cmdbuf);
+void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
+ unsigned int event, struct etnaviv_cmdbuf *cmdbuf);
void etnaviv_validate_init(void);
bool etnaviv_cmd_validate_one(struct etnaviv_gpu *gpu,
u32 *stream, unsigned int size,
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
index 2d955d7d7b6d..6d0909c589d1 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
@@ -120,7 +120,7 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
struct core_dump_iterator iter;
struct etnaviv_vram_mapping *vram;
struct etnaviv_gem_object *obj;
- struct etnaviv_cmdbuf *cmd;
+ struct etnaviv_gem_submit *submit;
unsigned int n_obj, n_bomap_pages;
size_t file_size, mmu_size;
__le64 *bomap, *bomap_start;
@@ -132,11 +132,11 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
n_bomap_pages = 0;
file_size = ARRAY_SIZE(etnaviv_dump_registers) *
sizeof(struct etnaviv_dump_registers) +
- mmu_size + gpu->buffer->size;
+ mmu_size + gpu->buffer.size;
/* Add in the active command buffers */
- list_for_each_entry(cmd, &gpu->active_cmd_list, node) {
- file_size += cmd->size;
+ list_for_each_entry(submit, &gpu->active_submit_list, node) {
+ file_size += submit->cmdbuf.size;
n_obj++;
}
@@ -176,13 +176,14 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
etnaviv_core_dump_registers(&iter, gpu);
etnaviv_core_dump_mmu(&iter, gpu, mmu_size);
- etnaviv_core_dump_mem(&iter, ETDUMP_BUF_RING, gpu->buffer->vaddr,
- gpu->buffer->size,
- etnaviv_cmdbuf_get_va(gpu->buffer));
-
- list_for_each_entry(cmd, &gpu->active_cmd_list, node)
- etnaviv_core_dump_mem(&iter, ETDUMP_BUF_CMD, cmd->vaddr,
- cmd->size, etnaviv_cmdbuf_get_va(cmd));
+ etnaviv_core_dump_mem(&iter, ETDUMP_BUF_RING, gpu->buffer.vaddr,
+ gpu->buffer.size,
+ etnaviv_cmdbuf_get_va(&gpu->buffer));
+
+ list_for_each_entry(submit, &gpu->active_submit_list, node)
+ etnaviv_core_dump_mem(&iter, ETDUMP_BUF_CMD,
+ submit->cmdbuf.vaddr, submit->cmdbuf.size,
+ etnaviv_cmdbuf_get_va(&submit->cmdbuf));
/* Reserve space for the bomap */
if (n_bomap_pages) {
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index daee3f1196df..fcc969fa0e69 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -24,6 +24,9 @@
#include "etnaviv_gpu.h"
#include "etnaviv_mmu.h"
+static struct lock_class_key etnaviv_shm_lock_class;
+static struct lock_class_key etnaviv_userptr_lock_class;
+
static void etnaviv_gem_scatter_map(struct etnaviv_gem_object *etnaviv_obj)
{
struct drm_device *dev = etnaviv_obj->base.dev;
@@ -583,7 +586,7 @@ void etnaviv_gem_free_object(struct drm_gem_object *obj)
kfree(etnaviv_obj);
}
-int etnaviv_gem_obj_add(struct drm_device *dev, struct drm_gem_object *obj)
+void etnaviv_gem_obj_add(struct drm_device *dev, struct drm_gem_object *obj)
{
struct etnaviv_drm_private *priv = dev->dev_private;
struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
@@ -591,8 +594,6 @@ int etnaviv_gem_obj_add(struct drm_device *dev, struct drm_gem_object *obj)
mutex_lock(&priv->gem_lock);
list_add_tail(&etnaviv_obj->gem_node, &priv->gem_list);
mutex_unlock(&priv->gem_lock);
-
- return 0;
}
static int etnaviv_gem_new_impl(struct drm_device *dev, u32 size, u32 flags,
@@ -640,8 +641,9 @@ static int etnaviv_gem_new_impl(struct drm_device *dev, u32 size, u32 flags,
return 0;
}
-static struct drm_gem_object *__etnaviv_gem_new(struct drm_device *dev,
- u32 size, u32 flags)
+/* convenience method to construct a GEM buffer object, and userspace handle */
+int etnaviv_gem_new_handle(struct drm_device *dev, struct drm_file *file,
+ u32 size, u32 flags, u32 *handle)
{
struct drm_gem_object *obj = NULL;
int ret;
@@ -653,6 +655,8 @@ static struct drm_gem_object *__etnaviv_gem_new(struct drm_device *dev,
if (ret)
goto fail;
+ lockdep_set_class(&to_etnaviv_bo(obj)->lock, &etnaviv_shm_lock_class);
+
ret = drm_gem_object_init(dev, obj, size);
if (ret == 0) {
struct address_space *mapping;
@@ -660,7 +664,7 @@ static struct drm_gem_object *__etnaviv_gem_new(struct drm_device *dev,
/*
* Our buffers are kept pinned, so allocating them
* from the MOVABLE zone is a really bad idea, and
- * conflicts with CMA. See coments above new_inode()
+ * conflicts with CMA. See comments above new_inode()
* why this is required _and_ expected if you're
* going to pin these pages.
*/
@@ -672,33 +676,12 @@ static struct drm_gem_object *__etnaviv_gem_new(struct drm_device *dev,
if (ret)
goto fail;
- return obj;
-
-fail:
- drm_gem_object_put_unlocked(obj);
- return ERR_PTR(ret);
-}
-
-/* convenience method to construct a GEM buffer object, and userspace handle */
-int etnaviv_gem_new_handle(struct drm_device *dev, struct drm_file *file,
- u32 size, u32 flags, u32 *handle)
-{
- struct drm_gem_object *obj;
- int ret;
-
- obj = __etnaviv_gem_new(dev, size, flags);
- if (IS_ERR(obj))
- return PTR_ERR(obj);
-
- ret = etnaviv_gem_obj_add(dev, obj);
- if (ret < 0) {
- drm_gem_object_put_unlocked(obj);
- return ret;
- }
+ etnaviv_gem_obj_add(dev, obj);
ret = drm_gem_handle_create(file, obj, handle);
/* drop reference from allocate - handle holds it now */
+fail:
drm_gem_object_put_unlocked(obj);
return ret;
@@ -722,139 +705,41 @@ int etnaviv_gem_new_private(struct drm_device *dev, size_t size, u32 flags,
return 0;
}
-struct get_pages_work {
- struct work_struct work;
- struct mm_struct *mm;
- struct task_struct *task;
- struct etnaviv_gem_object *etnaviv_obj;
-};
-
-static struct page **etnaviv_gem_userptr_do_get_pages(
- struct etnaviv_gem_object *etnaviv_obj, struct mm_struct *mm, struct task_struct *task)
-{
- int ret = 0, pinned, npages = etnaviv_obj->base.size >> PAGE_SHIFT;
- struct page **pvec;
- uintptr_t ptr;
- unsigned int flags = 0;
-
- pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
- if (!pvec)
- return ERR_PTR(-ENOMEM);
-
- if (!etnaviv_obj->userptr.ro)
- flags |= FOLL_WRITE;
-
- pinned = 0;
- ptr = etnaviv_obj->userptr.ptr;
-
- down_read(&mm->mmap_sem);
- while (pinned < npages) {
- ret = get_user_pages_remote(task, mm, ptr, npages - pinned,
- flags, pvec + pinned, NULL, NULL);
- if (ret < 0)
- break;
-
- ptr += ret * PAGE_SIZE;
- pinned += ret;
- }
- up_read(&mm->mmap_sem);
-
- if (ret < 0) {
- release_pages(pvec, pinned);
- kvfree(pvec);
- return ERR_PTR(ret);
- }
-
- return pvec;
-}
-
-static void __etnaviv_gem_userptr_get_pages(struct work_struct *_work)
-{
- struct get_pages_work *work = container_of(_work, typeof(*work), work);
- struct etnaviv_gem_object *etnaviv_obj = work->etnaviv_obj;
- struct page **pvec;
-
- pvec = etnaviv_gem_userptr_do_get_pages(etnaviv_obj, work->mm, work->task);
-
- mutex_lock(&etnaviv_obj->lock);
- if (IS_ERR(pvec)) {
- etnaviv_obj->userptr.work = ERR_CAST(pvec);
- } else {
- etnaviv_obj->userptr.work = NULL;
- etnaviv_obj->pages = pvec;
- }
-
- mutex_unlock(&etnaviv_obj->lock);
- drm_gem_object_put_unlocked(&etnaviv_obj->base);
-
- mmput(work->mm);
- put_task_struct(work->task);
- kfree(work);
-}
-
static int etnaviv_gem_userptr_get_pages(struct etnaviv_gem_object *etnaviv_obj)
{
struct page **pvec = NULL;
- struct get_pages_work *work;
- struct mm_struct *mm;
- int ret, pinned, npages = etnaviv_obj->base.size >> PAGE_SHIFT;
-
- if (etnaviv_obj->userptr.work) {
- if (IS_ERR(etnaviv_obj->userptr.work)) {
- ret = PTR_ERR(etnaviv_obj->userptr.work);
- etnaviv_obj->userptr.work = NULL;
- } else {
- ret = -EAGAIN;
- }
- return ret;
- }
+ struct etnaviv_gem_userptr *userptr = &etnaviv_obj->userptr;
+ int ret, pinned = 0, npages = etnaviv_obj->base.size >> PAGE_SHIFT;
- mm = get_task_mm(etnaviv_obj->userptr.task);
- pinned = 0;
- if (mm == current->mm) {
- pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
- if (!pvec) {
- mmput(mm);
- return -ENOMEM;
- }
-
- pinned = __get_user_pages_fast(etnaviv_obj->userptr.ptr, npages,
- !etnaviv_obj->userptr.ro, pvec);
- if (pinned < 0) {
- kvfree(pvec);
- mmput(mm);
- return pinned;
- }
-
- if (pinned == npages) {
- etnaviv_obj->pages = pvec;
- mmput(mm);
- return 0;
- }
- }
+ might_lock_read(&current->mm->mmap_sem);
- release_pages(pvec, pinned);
- kvfree(pvec);
+ if (userptr->mm != current->mm)
+ return -EPERM;
- work = kmalloc(sizeof(*work), GFP_KERNEL);
- if (!work) {
- mmput(mm);
+ pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
+ if (!pvec)
return -ENOMEM;
- }
- get_task_struct(current);
- drm_gem_object_get(&etnaviv_obj->base);
+ do {
+ unsigned num_pages = npages - pinned;
+ uint64_t ptr = userptr->ptr + pinned * PAGE_SIZE;
+ struct page **pages = pvec + pinned;
- work->mm = mm;
- work->task = current;
- work->etnaviv_obj = etnaviv_obj;
+ ret = get_user_pages_fast(ptr, num_pages,
+ !userptr->ro ? FOLL_WRITE : 0, pages);
+ if (ret < 0) {
+ release_pages(pvec, pinned);
+ kvfree(pvec);
+ return ret;
+ }
+
+ pinned += ret;
- etnaviv_obj->userptr.work = &work->work;
- INIT_WORK(&work->work, __etnaviv_gem_userptr_get_pages);
+ } while (pinned < npages);
- etnaviv_queue_work(etnaviv_obj->base.dev, &work->work);
+ etnaviv_obj->pages = pvec;
- return -EAGAIN;
+ return 0;
}
static void etnaviv_gem_userptr_release(struct etnaviv_gem_object *etnaviv_obj)
@@ -870,7 +755,6 @@ static void etnaviv_gem_userptr_release(struct etnaviv_gem_object *etnaviv_obj)
release_pages(etnaviv_obj->pages, npages);
kvfree(etnaviv_obj->pages);
}
- put_task_struct(etnaviv_obj->userptr.task);
}
static int etnaviv_gem_userptr_mmap_obj(struct etnaviv_gem_object *etnaviv_obj,
@@ -897,17 +781,16 @@ int etnaviv_gem_new_userptr(struct drm_device *dev, struct drm_file *file,
if (ret)
return ret;
+ lockdep_set_class(&etnaviv_obj->lock, &etnaviv_userptr_lock_class);
+
etnaviv_obj->userptr.ptr = ptr;
- etnaviv_obj->userptr.task = current;
+ etnaviv_obj->userptr.mm = current->mm;
etnaviv_obj->userptr.ro = !(flags & ETNA_USERPTR_WRITE);
- get_task_struct(current);
- ret = etnaviv_gem_obj_add(dev, &etnaviv_obj->base);
- if (ret)
- goto unreference;
+ etnaviv_gem_obj_add(dev, &etnaviv_obj->base);
ret = drm_gem_handle_create(file, &etnaviv_obj->base, handle);
-unreference:
+
/* drop reference from allocate - handle holds it now */
drm_gem_object_put_unlocked(&etnaviv_obj->base);
return ret;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.h b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
index e437fba1209d..be72a9833f2b 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
@@ -18,6 +18,7 @@
#define __ETNAVIV_GEM_H__
#include <linux/reservation.h>
+#include "etnaviv_cmdbuf.h"
#include "etnaviv_drv.h"
struct dma_fence;
@@ -26,8 +27,7 @@ struct etnaviv_gem_object;
struct etnaviv_gem_userptr {
uintptr_t ptr;
- struct task_struct *task;
- struct work_struct *work;
+ struct mm_struct *mm;
bool ro;
};
@@ -98,26 +98,32 @@ struct etnaviv_gem_submit_bo {
/* Created per submit-ioctl, to track bo's and cmdstream bufs, etc,
* associated with the cmdstream submission for synchronization (and
- * make it easier to unwind when things go wrong, etc). This only
- * lasts for the duration of the submit-ioctl.
+ * make it easier to unwind when things go wrong, etc).
*/
struct etnaviv_gem_submit {
- struct drm_device *dev;
+ struct kref refcount;
struct etnaviv_gpu *gpu;
- struct ww_acquire_ctx ticket;
- struct dma_fence *fence;
+ struct dma_fence *out_fence, *in_fence;
+ struct list_head node; /* GPU active submit list */
+ struct etnaviv_cmdbuf cmdbuf;
+ bool runtime_resumed;
+ u32 exec_state;
u32 flags;
+ unsigned int nr_pmrs;
+ struct etnaviv_perfmon_request *pmrs;
unsigned int nr_bos;
struct etnaviv_gem_submit_bo bos[0];
/* No new members here, the previous one is variable-length! */
};
+void etnaviv_submit_put(struct etnaviv_gem_submit * submit);
+
int etnaviv_gem_wait_bo(struct etnaviv_gpu *gpu, struct drm_gem_object *obj,
struct timespec *timeout);
int etnaviv_gem_new_private(struct drm_device *dev, size_t size, u32 flags,
struct reservation_object *robj, const struct etnaviv_gem_ops *ops,
struct etnaviv_gem_object **res);
-int etnaviv_gem_obj_add(struct drm_device *dev, struct drm_gem_object *obj);
+void etnaviv_gem_obj_add(struct drm_device *dev, struct drm_gem_object *obj);
struct page **etnaviv_gem_get_pages(struct etnaviv_gem_object *obj);
void etnaviv_gem_put_pages(struct etnaviv_gem_object *obj);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
index ae884723e9b1..5704305d41e6 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
@@ -19,6 +19,7 @@
#include "etnaviv_drv.h"
#include "etnaviv_gem.h"
+static struct lock_class_key etnaviv_prime_lock_class;
struct sg_table *etnaviv_gem_prime_get_sg_table(struct drm_gem_object *obj)
{
@@ -125,6 +126,8 @@ struct drm_gem_object *etnaviv_gem_prime_import_sg_table(struct drm_device *dev,
if (ret < 0)
return ERR_PTR(ret);
+ lockdep_set_class(&etnaviv_obj->lock, &etnaviv_prime_lock_class);
+
npages = size / PAGE_SIZE;
etnaviv_obj->sgt = sgt;
@@ -139,9 +142,7 @@ struct drm_gem_object *etnaviv_gem_prime_import_sg_table(struct drm_device *dev,
if (ret)
goto fail;
- ret = etnaviv_gem_obj_add(dev, &etnaviv_obj->base);
- if (ret)
- goto fail;
+ etnaviv_gem_obj_add(dev, &etnaviv_obj->base);
return &etnaviv_obj->base;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
index ff911541a190..1f8202bca061 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -33,22 +33,25 @@
#define BO_PINNED 0x2000
static struct etnaviv_gem_submit *submit_create(struct drm_device *dev,
- struct etnaviv_gpu *gpu, size_t nr)
+ struct etnaviv_gpu *gpu, size_t nr_bos, size_t nr_pmrs)
{
struct etnaviv_gem_submit *submit;
- size_t sz = size_vstruct(nr, sizeof(submit->bos[0]), sizeof(*submit));
+ size_t sz = size_vstruct(nr_bos, sizeof(submit->bos[0]), sizeof(*submit));
- submit = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
- if (submit) {
- submit->dev = dev;
- submit->gpu = gpu;
+ submit = kzalloc(sz, GFP_KERNEL);
+ if (!submit)
+ return NULL;
- /* initially, until copy_from_user() and bo lookup succeeds: */
- submit->nr_bos = 0;
- submit->fence = NULL;
-
- ww_acquire_init(&submit->ticket, &reservation_ww_class);
+ submit->pmrs = kcalloc(nr_pmrs, sizeof(struct etnaviv_perfmon_request),
+ GFP_KERNEL);
+ if (!submit->pmrs) {
+ kfree(submit);
+ return NULL;
}
+ submit->nr_pmrs = nr_pmrs;
+
+ submit->gpu = gpu;
+ kref_init(&submit->refcount);
return submit;
}
@@ -111,7 +114,8 @@ static void submit_unlock_object(struct etnaviv_gem_submit *submit, int i)
}
}
-static int submit_lock_objects(struct etnaviv_gem_submit *submit)
+static int submit_lock_objects(struct etnaviv_gem_submit *submit,
+ struct ww_acquire_ctx *ticket)
{
int contended, slow_locked = -1, i, ret = 0;
@@ -126,7 +130,7 @@ retry:
if (!(submit->bos[i].flags & BO_LOCKED)) {
ret = ww_mutex_lock_interruptible(&etnaviv_obj->resv->lock,
- &submit->ticket);
+ ticket);
if (ret == -EALREADY)
DRM_ERROR("BO at index %u already on submit list\n",
i);
@@ -136,7 +140,7 @@ retry:
}
}
- ww_acquire_done(&submit->ticket);
+ ww_acquire_done(ticket);
return 0;
@@ -154,7 +158,7 @@ fail:
/* we lost out in a seqno race, lock and retry.. */
ret = ww_mutex_lock_slow_interruptible(&etnaviv_obj->resv->lock,
- &submit->ticket);
+ ticket);
if (!ret) {
submit->bos[contended].flags |= BO_LOCKED;
slow_locked = contended;
@@ -181,19 +185,33 @@ static int submit_fence_sync(const struct etnaviv_gem_submit *submit)
break;
}
+ if (submit->flags & ETNA_SUBMIT_FENCE_FD_IN) {
+ /*
+ * Wait if the fence is from a foreign context, or if the fence
+ * array contains any fence from a foreign context.
+ */
+ if (!dma_fence_match_context(submit->in_fence, context))
+ ret = dma_fence_wait(submit->in_fence, true);
+ }
+
return ret;
}
-static void submit_unpin_objects(struct etnaviv_gem_submit *submit)
+static void submit_attach_object_fences(struct etnaviv_gem_submit *submit)
{
int i;
for (i = 0; i < submit->nr_bos; i++) {
- if (submit->bos[i].flags & BO_PINNED)
- etnaviv_gem_mapping_unreference(submit->bos[i].mapping);
+ struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
+
+ if (submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE)
+ reservation_object_add_excl_fence(etnaviv_obj->resv,
+ submit->out_fence);
+ else
+ reservation_object_add_shared_fence(etnaviv_obj->resv,
+ submit->out_fence);
- submit->bos[i].mapping = NULL;
- submit->bos[i].flags &= ~BO_PINNED;
+ submit_unlock_object(submit, i);
}
}
@@ -211,6 +229,7 @@ static int submit_pin_objects(struct etnaviv_gem_submit *submit)
ret = PTR_ERR(mapping);
break;
}
+ atomic_inc(&etnaviv_obj->gpu_active);
submit->bos[i].flags |= BO_PINNED;
submit->bos[i].mapping = mapping;
@@ -285,13 +304,11 @@ static int submit_reloc(struct etnaviv_gem_submit *submit, void *stream,
}
static int submit_perfmon_validate(struct etnaviv_gem_submit *submit,
- struct etnaviv_cmdbuf *cmdbuf,
- const struct drm_etnaviv_gem_submit_pmr *pmrs,
- u32 nr_pms)
+ u32 exec_state, const struct drm_etnaviv_gem_submit_pmr *pmrs)
{
u32 i;
- for (i = 0; i < nr_pms; i++) {
+ for (i = 0; i < submit->nr_pmrs; i++) {
const struct drm_etnaviv_gem_submit_pmr *r = pmrs + i;
struct etnaviv_gem_submit_bo *bo;
int ret;
@@ -316,39 +333,65 @@ static int submit_perfmon_validate(struct etnaviv_gem_submit *submit,
return -EINVAL;
}
- if (etnaviv_pm_req_validate(r, cmdbuf->exec_state)) {
+ if (etnaviv_pm_req_validate(r, exec_state)) {
DRM_ERROR("perfmon request: domain or signal not valid");
return -EINVAL;
}
- cmdbuf->pmrs[i].flags = r->flags;
- cmdbuf->pmrs[i].domain = r->domain;
- cmdbuf->pmrs[i].signal = r->signal;
- cmdbuf->pmrs[i].sequence = r->sequence;
- cmdbuf->pmrs[i].offset = r->read_offset;
- cmdbuf->pmrs[i].bo_vma = etnaviv_gem_vmap(&bo->obj->base);
+ submit->pmrs[i].flags = r->flags;
+ submit->pmrs[i].domain = r->domain;
+ submit->pmrs[i].signal = r->signal;
+ submit->pmrs[i].sequence = r->sequence;
+ submit->pmrs[i].offset = r->read_offset;
+ submit->pmrs[i].bo_vma = etnaviv_gem_vmap(&bo->obj->base);
}
return 0;
}
-static void submit_cleanup(struct etnaviv_gem_submit *submit)
+static void submit_cleanup(struct kref *kref)
{
+ struct etnaviv_gem_submit *submit =
+ container_of(kref, struct etnaviv_gem_submit, refcount);
unsigned i;
+ if (submit->runtime_resumed)
+ pm_runtime_put_autosuspend(submit->gpu->dev);
+
+ if (submit->cmdbuf.suballoc)
+ etnaviv_cmdbuf_free(&submit->cmdbuf);
+
for (i = 0; i < submit->nr_bos; i++) {
struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
+ /* unpin all objects */
+ if (submit->bos[i].flags & BO_PINNED) {
+ etnaviv_gem_mapping_unreference(submit->bos[i].mapping);
+ atomic_dec(&etnaviv_obj->gpu_active);
+ submit->bos[i].mapping = NULL;
+ submit->bos[i].flags &= ~BO_PINNED;
+ }
+
+ /* if the GPU submit failed, objects might still be locked */
submit_unlock_object(submit, i);
drm_gem_object_put_unlocked(&etnaviv_obj->base);
}
- ww_acquire_fini(&submit->ticket);
- if (submit->fence)
- dma_fence_put(submit->fence);
+ wake_up_all(&submit->gpu->fence_event);
+
+ if (submit->in_fence)
+ dma_fence_put(submit->in_fence);
+ if (submit->out_fence)
+ dma_fence_put(submit->out_fence);
+ kfree(submit->pmrs);
kfree(submit);
}
+void etnaviv_submit_put(struct etnaviv_gem_submit *submit)
+{
+ kref_put(&submit->refcount, submit_cleanup);
+}
+
int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
struct drm_file *file)
{
@@ -358,10 +401,9 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
struct drm_etnaviv_gem_submit_pmr *pmrs;
struct drm_etnaviv_gem_submit_bo *bos;
struct etnaviv_gem_submit *submit;
- struct etnaviv_cmdbuf *cmdbuf;
struct etnaviv_gpu *gpu;
- struct dma_fence *in_fence = NULL;
struct sync_file *sync_file = NULL;
+ struct ww_acquire_ctx ticket;
int out_fence_fd = -1;
void *stream;
int ret;
@@ -399,17 +441,11 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
relocs = kvmalloc_array(args->nr_relocs, sizeof(*relocs), GFP_KERNEL);
pmrs = kvmalloc_array(args->nr_pmrs, sizeof(*pmrs), GFP_KERNEL);
stream = kvmalloc_array(1, args->stream_size, GFP_KERNEL);
- cmdbuf = etnaviv_cmdbuf_new(gpu->cmdbuf_suballoc,
- ALIGN(args->stream_size, 8) + 8,
- args->nr_bos, args->nr_pmrs);
- if (!bos || !relocs || !pmrs || !stream || !cmdbuf) {
+ if (!bos || !relocs || !pmrs || !stream) {
ret = -ENOMEM;
goto err_submit_cmds;
}
- cmdbuf->exec_state = args->exec_state;
- cmdbuf->ctx = file->driver_priv;
-
ret = copy_from_user(bos, u64_to_user_ptr(args->bos),
args->nr_bos * sizeof(*bos));
if (ret) {
@@ -430,7 +466,6 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
ret = -EFAULT;
goto err_submit_cmds;
}
- cmdbuf->nr_pmrs = args->nr_pmrs;
ret = copy_from_user(stream, u64_to_user_ptr(args->stream),
args->stream_size);
@@ -447,19 +482,28 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
}
}
- submit = submit_create(dev, gpu, args->nr_bos);
+ ww_acquire_init(&ticket, &reservation_ww_class);
+
+ submit = submit_create(dev, gpu, args->nr_bos, args->nr_pmrs);
if (!submit) {
ret = -ENOMEM;
- goto err_submit_cmds;
+ goto err_submit_ww_acquire;
}
+ ret = etnaviv_cmdbuf_init(gpu->cmdbuf_suballoc, &submit->cmdbuf,
+ ALIGN(args->stream_size, 8) + 8);
+ if (ret)
+ goto err_submit_objects;
+
+ submit->cmdbuf.ctx = file->driver_priv;
+ submit->exec_state = args->exec_state;
submit->flags = args->flags;
ret = submit_lookup_objects(submit, file, bos, args->nr_bos);
if (ret)
goto err_submit_objects;
- ret = submit_lock_objects(submit);
+ ret = submit_lock_objects(submit, &ticket);
if (ret)
goto err_submit_objects;
@@ -470,21 +514,11 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
}
if (args->flags & ETNA_SUBMIT_FENCE_FD_IN) {
- in_fence = sync_file_get_fence(args->fence_fd);
- if (!in_fence) {
+ submit->in_fence = sync_file_get_fence(args->fence_fd);
+ if (!submit->in_fence) {
ret = -EINVAL;
goto err_submit_objects;
}
-
- /*
- * Wait if the fence is from a foreign context, or if the fence
- * array contains any fence from a foreign context.
- */
- if (!dma_fence_match_context(in_fence, gpu->fence_context)) {
- ret = dma_fence_wait(in_fence, true);
- if (ret)
- goto err_submit_objects;
- }
}
ret = submit_fence_sync(submit);
@@ -493,25 +527,25 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
ret = submit_pin_objects(submit);
if (ret)
- goto out;
+ goto err_submit_objects;
ret = submit_reloc(submit, stream, args->stream_size / 4,
relocs, args->nr_relocs);
if (ret)
- goto out;
+ goto err_submit_objects;
- ret = submit_perfmon_validate(submit, cmdbuf, pmrs, args->nr_pmrs);
+ ret = submit_perfmon_validate(submit, args->exec_state, pmrs);
if (ret)
- goto out;
+ goto err_submit_objects;
- memcpy(cmdbuf->vaddr, stream, args->stream_size);
- cmdbuf->user_size = ALIGN(args->stream_size, 8);
+ memcpy(submit->cmdbuf.vaddr, stream, args->stream_size);
+ submit->cmdbuf.user_size = ALIGN(args->stream_size, 8);
- ret = etnaviv_gpu_submit(gpu, submit, cmdbuf);
+ ret = etnaviv_gpu_submit(gpu, submit);
if (ret)
- goto out;
+ goto err_submit_objects;
- cmdbuf = NULL;
+ submit_attach_object_fences(submit);
if (args->flags & ETNA_SUBMIT_FENCE_FD_OUT) {
/*
@@ -520,39 +554,26 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
* fence to the sync file here, eliminating the ENOMEM
* possibility at this stage.
*/
- sync_file = sync_file_create(submit->fence);
+ sync_file = sync_file_create(submit->out_fence);
if (!sync_file) {
ret = -ENOMEM;
- goto out;
+ goto err_submit_objects;
}
fd_install(out_fence_fd, sync_file->file);
}
args->fence_fd = out_fence_fd;
- args->fence = submit->fence->seqno;
-
-out:
- submit_unpin_objects(submit);
-
- /*
- * If we're returning -EAGAIN, it may be due to the userptr code
- * wanting to run its workqueue outside of any locks. Flush our
- * workqueue to ensure that it is run in a timely manner.
- */
- if (ret == -EAGAIN)
- flush_workqueue(priv->wq);
+ args->fence = submit->out_fence->seqno;
err_submit_objects:
- if (in_fence)
- dma_fence_put(in_fence);
- submit_cleanup(submit);
+ etnaviv_submit_put(submit);
+
+err_submit_ww_acquire:
+ ww_acquire_fini(&ticket);
err_submit_cmds:
if (ret && (out_fence_fd >= 0))
put_unused_fd(out_fence_fd);
- /* if we still own the cmdbuf */
- if (cmdbuf)
- etnaviv_cmdbuf_free(cmdbuf);
if (stream)
kvfree(stream);
if (bos)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index e19cbe05da2a..21d0d22f1168 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -644,7 +644,7 @@ static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
prefetch = etnaviv_buffer_init(gpu);
gpu_write(gpu, VIVS_HI_INTR_ENBL, ~0U);
- etnaviv_gpu_start_fe(gpu, etnaviv_cmdbuf_get_va(gpu->buffer),
+ etnaviv_gpu_start_fe(gpu, etnaviv_cmdbuf_get_va(&gpu->buffer),
prefetch);
}
@@ -717,15 +717,15 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
}
/* Create buffer: */
- gpu->buffer = etnaviv_cmdbuf_new(gpu->cmdbuf_suballoc, PAGE_SIZE, 0, 0);
- if (!gpu->buffer) {
- ret = -ENOMEM;
+ ret = etnaviv_cmdbuf_init(gpu->cmdbuf_suballoc, &gpu->buffer,
+ PAGE_SIZE);
+ if (ret) {
dev_err(gpu->dev, "could not create command buffer\n");
goto destroy_iommu;
}
if (gpu->mmu->version == ETNAVIV_IOMMU_V1 &&
- etnaviv_cmdbuf_get_va(gpu->buffer) > 0x80000000) {
+ etnaviv_cmdbuf_get_va(&gpu->buffer) > 0x80000000) {
ret = -EINVAL;
dev_err(gpu->dev,
"command buffer outside valid memory window\n");
@@ -751,8 +751,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
return 0;
free_buffer:
- etnaviv_cmdbuf_free(gpu->buffer);
- gpu->buffer = NULL;
+ etnaviv_cmdbuf_free(&gpu->buffer);
destroy_iommu:
etnaviv_iommu_destroy(gpu->mmu);
gpu->mmu = NULL;
@@ -958,7 +957,7 @@ static void recover_worker(struct work_struct *work)
pm_runtime_put_autosuspend(gpu->dev);
/* Retire the buffer objects in a work */
- etnaviv_queue_work(gpu->drm, &gpu->retire_work);
+ queue_work(gpu->wq, &gpu->retire_work);
}
static void hangcheck_timer_reset(struct etnaviv_gpu *gpu)
@@ -994,7 +993,7 @@ static void hangcheck_handler(struct timer_list *t)
dev_err(gpu->dev, " completed fence: %u\n", fence);
dev_err(gpu->dev, " active fence: %u\n",
gpu->active_fence);
- etnaviv_queue_work(gpu->drm, &gpu->recover_work);
+ queue_work(gpu->wq, &gpu->recover_work);
}
/* if still more pending work, reset the hangcheck timer: */
@@ -1201,42 +1200,23 @@ static void retire_worker(struct work_struct *work)
struct etnaviv_gpu *gpu = container_of(work, struct etnaviv_gpu,
retire_work);
u32 fence = gpu->completed_fence;
- struct etnaviv_cmdbuf *cmdbuf, *tmp;
- unsigned int i;
+ struct etnaviv_gem_submit *submit, *tmp;
+ LIST_HEAD(retire_list);
mutex_lock(&gpu->lock);
- list_for_each_entry_safe(cmdbuf, tmp, &gpu->active_cmd_list, node) {
- if (!dma_fence_is_signaled(cmdbuf->fence))
+ list_for_each_entry_safe(submit, tmp, &gpu->active_submit_list, node) {
+ if (!dma_fence_is_signaled(submit->out_fence))
break;
- list_del(&cmdbuf->node);
- dma_fence_put(cmdbuf->fence);
-
- for (i = 0; i < cmdbuf->nr_bos; i++) {
- struct etnaviv_vram_mapping *mapping = cmdbuf->bo_map[i];
- struct etnaviv_gem_object *etnaviv_obj = mapping->object;
-
- atomic_dec(&etnaviv_obj->gpu_active);
- /* drop the refcount taken in etnaviv_gpu_submit */
- etnaviv_gem_mapping_unreference(mapping);
- }
-
- etnaviv_cmdbuf_free(cmdbuf);
- /*
- * We need to balance the runtime PM count caused by
- * each submission. Upon submission, we increment
- * the runtime PM counter, and allocate one event.
- * So here, we put the runtime PM count for each
- * completed event.
- */
- pm_runtime_put_autosuspend(gpu->dev);
+ list_move(&submit->node, &retire_list);
}
gpu->retired_fence = fence;
mutex_unlock(&gpu->lock);
- wake_up_all(&gpu->fence_event);
+ list_for_each_entry_safe(submit, tmp, &retire_list, node)
+ etnaviv_submit_put(submit);
}
int etnaviv_gpu_wait_fence_interruptible(struct etnaviv_gpu *gpu,
@@ -1295,41 +1275,25 @@ int etnaviv_gpu_wait_obj_inactive(struct etnaviv_gpu *gpu,
ret = wait_event_interruptible_timeout(gpu->fence_event,
!is_active(etnaviv_obj),
remaining);
- if (ret > 0) {
- struct etnaviv_drm_private *priv = gpu->drm->dev_private;
-
- /* Synchronise with the retire worker */
- flush_workqueue(priv->wq);
+ if (ret > 0)
return 0;
- } else if (ret == -ERESTARTSYS) {
+ else if (ret == -ERESTARTSYS)
return -ERESTARTSYS;
- } else {
+ else
return -ETIMEDOUT;
- }
-}
-
-int etnaviv_gpu_pm_get_sync(struct etnaviv_gpu *gpu)
-{
- return pm_runtime_get_sync(gpu->dev);
-}
-
-void etnaviv_gpu_pm_put(struct etnaviv_gpu *gpu)
-{
- pm_runtime_mark_last_busy(gpu->dev);
- pm_runtime_put_autosuspend(gpu->dev);
}
static void sync_point_perfmon_sample(struct etnaviv_gpu *gpu,
struct etnaviv_event *event, unsigned int flags)
{
- const struct etnaviv_cmdbuf *cmdbuf = event->cmdbuf;
+ const struct etnaviv_gem_submit *submit = event->submit;
unsigned int i;
- for (i = 0; i < cmdbuf->nr_pmrs; i++) {
- const struct etnaviv_perfmon_request *pmr = cmdbuf->pmrs + i;
+ for (i = 0; i < submit->nr_pmrs; i++) {
+ const struct etnaviv_perfmon_request *pmr = submit->pmrs + i;
if (pmr->flags == flags)
- etnaviv_perfmon_process(gpu, pmr);
+ etnaviv_perfmon_process(gpu, pmr, submit->exec_state);
}
}
@@ -1354,14 +1318,14 @@ static void sync_point_perfmon_sample_pre(struct etnaviv_gpu *gpu,
static void sync_point_perfmon_sample_post(struct etnaviv_gpu *gpu,
struct etnaviv_event *event)
{
- const struct etnaviv_cmdbuf *cmdbuf = event->cmdbuf;
+ const struct etnaviv_gem_submit *submit = event->submit;
unsigned int i;
u32 val;
sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_POST);
- for (i = 0; i < cmdbuf->nr_pmrs; i++) {
- const struct etnaviv_perfmon_request *pmr = cmdbuf->pmrs + i;
+ for (i = 0; i < submit->nr_pmrs; i++) {
+ const struct etnaviv_perfmon_request *pmr = submit->pmrs + i;
*pmr->bo_vma = pmr->sequence;
}
@@ -1380,24 +1344,15 @@ static void sync_point_perfmon_sample_post(struct etnaviv_gpu *gpu,
/* add bo's to gpu's ring, and kick gpu: */
int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
- struct etnaviv_gem_submit *submit, struct etnaviv_cmdbuf *cmdbuf)
+ struct etnaviv_gem_submit *submit)
{
- struct dma_fence *fence;
unsigned int i, nr_events = 1, event[3];
int ret;
- ret = etnaviv_gpu_pm_get_sync(gpu);
+ ret = pm_runtime_get_sync(gpu->dev);
if (ret < 0)
return ret;
-
- /*
- * TODO
- *
- * - flush
- * - data endian
- * - prefetch
- *
- */
+ submit->runtime_resumed = true;
/*
* if there are performance monitor requests we need to have
@@ -1406,19 +1361,19 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
* - a sync point to re-configure gpu, process ETNA_PM_PROCESS_POST requests
* and update the sequence number for userspace.
*/
- if (cmdbuf->nr_pmrs)
+ if (submit->nr_pmrs)
nr_events = 3;
ret = event_alloc(gpu, nr_events, event);
if (ret) {
DRM_ERROR("no free events\n");
- goto out_pm_put;
+ return ret;
}
mutex_lock(&gpu->lock);
- fence = etnaviv_gpu_fence_alloc(gpu);
- if (!fence) {
+ submit->out_fence = etnaviv_gpu_fence_alloc(gpu);
+ if (!submit->out_fence) {
for (i = 0; i < nr_events; i++)
event_free(gpu, event[i]);
@@ -1426,80 +1381,51 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
goto out_unlock;
}
- gpu->event[event[0]].fence = fence;
- submit->fence = dma_fence_get(fence);
- gpu->active_fence = submit->fence->seqno;
+ gpu->active_fence = submit->out_fence->seqno;
- if (gpu->lastctx != cmdbuf->ctx) {
- gpu->mmu->need_flush = true;
- gpu->switch_context = true;
- gpu->lastctx = cmdbuf->ctx;
- }
-
- if (cmdbuf->nr_pmrs) {
+ if (submit->nr_pmrs) {
gpu->event[event[1]].sync_point = &sync_point_perfmon_sample_pre;
- gpu->event[event[1]].cmdbuf = cmdbuf;
+ kref_get(&submit->refcount);
+ gpu->event[event[1]].submit = submit;
etnaviv_sync_point_queue(gpu, event[1]);
}
- etnaviv_buffer_queue(gpu, event[0], cmdbuf);
+ kref_get(&submit->refcount);
+ gpu->event[event[0]].fence = submit->out_fence;
+ etnaviv_buffer_queue(gpu, submit->exec_state, event[0],
+ &submit->cmdbuf);
- if (cmdbuf->nr_pmrs) {
+ if (submit->nr_pmrs) {
gpu->event[event[2]].sync_point = &sync_point_perfmon_sample_post;
- gpu->event[event[2]].cmdbuf = cmdbuf;
+ kref_get(&submit->refcount);
+ gpu->event[event[2]].submit = submit;
etnaviv_sync_point_queue(gpu, event[2]);
}
- cmdbuf->fence = fence;
- list_add_tail(&cmdbuf->node, &gpu->active_cmd_list);
-
- /* We're committed to adding this command buffer, hold a PM reference */
- pm_runtime_get_noresume(gpu->dev);
-
- for (i = 0; i < submit->nr_bos; i++) {
- struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
+ list_add_tail(&submit->node, &gpu->active_submit_list);
- /* Each cmdbuf takes a refcount on the mapping */
- etnaviv_gem_mapping_reference(submit->bos[i].mapping);
- cmdbuf->bo_map[i] = submit->bos[i].mapping;
- atomic_inc(&etnaviv_obj->gpu_active);
-
- if (submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE)
- reservation_object_add_excl_fence(etnaviv_obj->resv,
- fence);
- else
- reservation_object_add_shared_fence(etnaviv_obj->resv,
- fence);
- }
- cmdbuf->nr_bos = submit->nr_bos;
hangcheck_timer_reset(gpu);
ret = 0;
out_unlock:
mutex_unlock(&gpu->lock);
-out_pm_put:
- etnaviv_gpu_pm_put(gpu);
-
return ret;
}
-static void etnaviv_process_sync_point(struct etnaviv_gpu *gpu,
- struct etnaviv_event *event)
-{
- u32 addr = gpu_read(gpu, VIVS_FE_DMA_ADDRESS);
-
- event->sync_point(gpu, event);
- etnaviv_gpu_start_fe(gpu, addr + 2, 2);
-}
-
static void sync_point_worker(struct work_struct *work)
{
struct etnaviv_gpu *gpu = container_of(work, struct etnaviv_gpu,
sync_point_work);
+ struct etnaviv_event *event = &gpu->event[gpu->sync_point_event];
+ u32 addr = gpu_read(gpu, VIVS_FE_DMA_ADDRESS);
- etnaviv_process_sync_point(gpu, &gpu->event[gpu->sync_point_event]);
+ event->sync_point(gpu, event);
+ etnaviv_submit_put(event->submit);
event_free(gpu, gpu->sync_point_event);
+
+ /* restart FE last to avoid GPU and IRQ racing against this worker */
+ etnaviv_gpu_start_fe(gpu, addr + 2, 2);
}
/*
@@ -1550,7 +1476,7 @@ static irqreturn_t irq_handler(int irq, void *data)
if (gpu->event[event].sync_point) {
gpu->sync_point_event = event;
- etnaviv_queue_work(gpu->drm, &gpu->sync_point_work);
+ queue_work(gpu->wq, &gpu->sync_point_work);
}
fence = gpu->event[event].fence;
@@ -1576,7 +1502,7 @@ static irqreturn_t irq_handler(int irq, void *data)
}
/* Retire the buffer objects in a work */
- etnaviv_queue_work(gpu->drm, &gpu->retire_work);
+ queue_work(gpu->wq, &gpu->retire_work);
ret = IRQ_HANDLED;
}
@@ -1653,9 +1579,11 @@ int etnaviv_gpu_wait_idle(struct etnaviv_gpu *gpu, unsigned int timeout_ms)
static int etnaviv_gpu_hw_suspend(struct etnaviv_gpu *gpu)
{
- if (gpu->buffer) {
+ if (gpu->buffer.suballoc) {
/* Replace the last WAIT with END */
+ mutex_lock(&gpu->lock);
etnaviv_buffer_end(gpu);
+ mutex_unlock(&gpu->lock);
/*
* We know that only the FE is busy here, this should
@@ -1680,7 +1608,7 @@ static int etnaviv_gpu_hw_resume(struct etnaviv_gpu *gpu)
etnaviv_gpu_update_clock(gpu);
etnaviv_gpu_hw_init(gpu);
- gpu->switch_context = true;
+ gpu->lastctx = NULL;
gpu->exec_state = -1;
mutex_unlock(&gpu->lock);
@@ -1738,20 +1666,29 @@ static int etnaviv_gpu_bind(struct device *dev, struct device *master,
struct etnaviv_gpu *gpu = dev_get_drvdata(dev);
int ret;
- if (IS_ENABLED(CONFIG_THERMAL)) {
+ if (IS_ENABLED(CONFIG_DRM_ETNAVIV_THERMAL)) {
gpu->cooling = thermal_of_cooling_device_register(dev->of_node,
(char *)dev_name(dev), gpu, &cooling_ops);
if (IS_ERR(gpu->cooling))
return PTR_ERR(gpu->cooling);
}
+ gpu->wq = alloc_ordered_workqueue(dev_name(dev), 0);
+ if (!gpu->wq) {
+ if (IS_ENABLED(CONFIG_DRM_ETNAVIV_THERMAL))
+ thermal_cooling_device_unregister(gpu->cooling);
+ return -ENOMEM;
+ }
+
#ifdef CONFIG_PM
ret = pm_runtime_get_sync(gpu->dev);
#else
ret = etnaviv_gpu_clk_enable(gpu);
#endif
if (ret < 0) {
- thermal_cooling_device_unregister(gpu->cooling);
+ destroy_workqueue(gpu->wq);
+ if (IS_ENABLED(CONFIG_DRM_ETNAVIV_THERMAL))
+ thermal_cooling_device_unregister(gpu->cooling);
return ret;
}
@@ -1759,7 +1696,7 @@ static int etnaviv_gpu_bind(struct device *dev, struct device *master,
gpu->fence_context = dma_fence_context_alloc(1);
spin_lock_init(&gpu->fence_spinlock);
- INIT_LIST_HEAD(&gpu->active_cmd_list);
+ INIT_LIST_HEAD(&gpu->active_submit_list);
INIT_WORK(&gpu->retire_work, retire_worker);
INIT_WORK(&gpu->sync_point_work, sync_point_worker);
INIT_WORK(&gpu->recover_work, recover_worker);
@@ -1784,6 +1721,9 @@ static void etnaviv_gpu_unbind(struct device *dev, struct device *master,
hangcheck_disable(gpu);
+ flush_workqueue(gpu->wq);
+ destroy_workqueue(gpu->wq);
+
#ifdef CONFIG_PM
pm_runtime_get_sync(gpu->dev);
pm_runtime_put_sync_suspend(gpu->dev);
@@ -1791,10 +1731,8 @@ static void etnaviv_gpu_unbind(struct device *dev, struct device *master,
etnaviv_gpu_hw_suspend(gpu);
#endif
- if (gpu->buffer) {
- etnaviv_cmdbuf_free(gpu->buffer);
- gpu->buffer = NULL;
- }
+ if (gpu->buffer.suballoc)
+ etnaviv_cmdbuf_free(&gpu->buffer);
if (gpu->cmdbuf_suballoc) {
etnaviv_cmdbuf_suballoc_destroy(gpu->cmdbuf_suballoc);
@@ -1808,7 +1746,8 @@ static void etnaviv_gpu_unbind(struct device *dev, struct device *master,
gpu->drm = NULL;
- thermal_cooling_device_unregister(gpu->cooling);
+ if (IS_ENABLED(CONFIG_DRM_ETNAVIV_THERMAL))
+ thermal_cooling_device_unregister(gpu->cooling);
gpu->cooling = NULL;
}
@@ -1931,7 +1870,7 @@ static int etnaviv_gpu_rpm_resume(struct device *dev)
return ret;
/* Re-initialise the basic hardware state */
- if (gpu->drm && gpu->buffer) {
+ if (gpu->drm && gpu->buffer.suballoc) {
ret = etnaviv_gpu_hw_resume(gpu);
if (ret) {
etnaviv_gpu_clk_disable(gpu);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
index 4f10f147297a..7623905210dc 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
@@ -20,6 +20,7 @@
#include <linux/clk.h>
#include <linux/regulator/consumer.h>
+#include "etnaviv_cmdbuf.h"
#include "etnaviv_drv.h"
struct etnaviv_gem_submit;
@@ -89,7 +90,7 @@ struct etnaviv_chip_identity {
struct etnaviv_event {
struct dma_fence *fence;
- struct etnaviv_cmdbuf *cmdbuf;
+ struct etnaviv_gem_submit *submit;
void (*sync_point)(struct etnaviv_gpu *gpu, struct etnaviv_event *event);
};
@@ -106,10 +107,10 @@ struct etnaviv_gpu {
struct mutex lock;
struct etnaviv_chip_identity identity;
struct etnaviv_file_private *lastctx;
- bool switch_context;
+ struct workqueue_struct *wq;
/* 'ring'-buffer: */
- struct etnaviv_cmdbuf *buffer;
+ struct etnaviv_cmdbuf buffer;
int exec_state;
/* bus base address of memory */
@@ -122,7 +123,7 @@ struct etnaviv_gpu {
spinlock_t event_spinlock;
/* list of currently in-flight command buffers */
- struct list_head active_cmd_list;
+ struct list_head active_submit_list;
u32 idle_mask;
@@ -202,7 +203,7 @@ int etnaviv_gpu_wait_fence_interruptible(struct etnaviv_gpu *gpu,
int etnaviv_gpu_wait_obj_inactive(struct etnaviv_gpu *gpu,
struct etnaviv_gem_object *etnaviv_obj, struct timespec *timeout);
int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
- struct etnaviv_gem_submit *submit, struct etnaviv_cmdbuf *cmdbuf);
+ struct etnaviv_gem_submit *submit);
int etnaviv_gpu_pm_get_sync(struct etnaviv_gpu *gpu);
void etnaviv_gpu_pm_put(struct etnaviv_gpu *gpu);
int etnaviv_gpu_wait_idle(struct etnaviv_gpu *gpu, unsigned int timeout_ms);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
index 14e24ac6573f..7a8c94731748 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
@@ -70,9 +70,8 @@ static int __etnaviv_iommu_init(struct etnaviv_iommuv1_domain *etnaviv_domain)
return -ENOMEM;
}
- for (i = 0; i < PT_ENTRIES; i++)
- etnaviv_domain->pgtable_cpu[i] =
- etnaviv_domain->base.bad_page_dma;
+ memset32(etnaviv_domain->pgtable_cpu, etnaviv_domain->base.bad_page_dma,
+ PT_ENTRIES);
return 0;
}
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
index fc60fc8ddbf0..1e956e266aa3 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
@@ -229,7 +229,7 @@ void etnaviv_iommuv2_restore(struct etnaviv_gpu *gpu)
prefetch = etnaviv_buffer_config_mmuv2(gpu,
(u32)etnaviv_domain->mtlb_dma,
(u32)etnaviv_domain->base.bad_page_dma);
- etnaviv_gpu_start_fe(gpu, (u32)etnaviv_cmdbuf_get_pa(gpu->buffer),
+ etnaviv_gpu_start_fe(gpu, (u32)etnaviv_cmdbuf_get_pa(&gpu->buffer),
prefetch);
etnaviv_gpu_wait_idle(gpu, 100);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
index 35074b944778..d113fe06e6b5 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
@@ -263,18 +263,16 @@ int etnaviv_iommu_map_gem(struct etnaviv_iommu *mmu,
if (iova < 0x80000000 - sg_dma_len(sgt->sgl)) {
mapping->iova = iova;
list_add_tail(&mapping->mmu_node, &mmu->mappings);
- mutex_unlock(&mmu->lock);
- return 0;
+ ret = 0;
+ goto unlock;
}
}
node = &mapping->vram_node;
ret = etnaviv_iommu_find_iova(mmu, node, etnaviv_obj->base.size);
- if (ret < 0) {
- mutex_unlock(&mmu->lock);
- return ret;
- }
+ if (ret < 0)
+ goto unlock;
mmu->last_iova = node->start + etnaviv_obj->base.size;
mapping->iova = node->start;
@@ -283,12 +281,12 @@ int etnaviv_iommu_map_gem(struct etnaviv_iommu *mmu,
if (ret < 0) {
drm_mm_remove_node(node);
- mutex_unlock(&mmu->lock);
- return ret;
+ goto unlock;
}
list_add_tail(&mapping->mmu_node, &mmu->mappings);
mmu->need_flush = true;
+unlock:
mutex_unlock(&mmu->lock);
return ret;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c
index 768f5aafdd18..26dddfc41aac 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c
@@ -479,9 +479,9 @@ int etnaviv_pm_req_validate(const struct drm_etnaviv_gem_submit_pmr *r,
}
void etnaviv_perfmon_process(struct etnaviv_gpu *gpu,
- const struct etnaviv_perfmon_request *pmr)
+ const struct etnaviv_perfmon_request *pmr, u32 exec_state)
{
- const struct etnaviv_pm_domain_meta *meta = &doms_meta[gpu->exec_state];
+ const struct etnaviv_pm_domain_meta *meta = &doms_meta[exec_state];
const struct etnaviv_pm_domain *dom;
const struct etnaviv_pm_signal *sig;
u32 *bo = pmr->bo_vma;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.h b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.h
index 35dce194cb00..c1653c64ab6b 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.h
@@ -44,6 +44,6 @@ int etnaviv_pm_req_validate(const struct drm_etnaviv_gem_submit_pmr *r,
u32 exec_state);
void etnaviv_perfmon_process(struct etnaviv_gpu *gpu,
- const struct etnaviv_perfmon_request *pmr);
+ const struct etnaviv_perfmon_request *pmr, u32 exec_state);
#endif /* __ETNAVIV_PERFMON_H__ */
diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig
index 5a7c9d8abd6b..735ce47688f9 100644
--- a/drivers/gpu/drm/exynos/Kconfig
+++ b/drivers/gpu/drm/exynos/Kconfig
@@ -95,26 +95,21 @@ config DRM_EXYNOS_G2D
help
Choose this option if you want to use Exynos G2D for DRM.
-config DRM_EXYNOS_IPP
- bool "Image Post Processor"
- help
- Choose this option if you want to use IPP feature for DRM.
-
config DRM_EXYNOS_FIMC
bool "FIMC"
- depends on DRM_EXYNOS_IPP && MFD_SYSCON
+ depends on BROKEN && MFD_SYSCON
help
Choose this option if you want to use Exynos FIMC for DRM.
config DRM_EXYNOS_ROTATOR
bool "Rotator"
- depends on DRM_EXYNOS_IPP
+ depends on BROKEN
help
Choose this option if you want to use Exynos Rotator for DRM.
config DRM_EXYNOS_GSC
bool "GScaler"
- depends on DRM_EXYNOS_IPP && ARCH_EXYNOS5 && VIDEO_SAMSUNG_EXYNOS_GSC=n
+ depends on BROKEN && ARCH_EXYNOS5 && VIDEO_SAMSUNG_EXYNOS_GSC=n
help
Choose this option if you want to use Exynos GSC for DRM.
diff --git a/drivers/gpu/drm/exynos/Makefile b/drivers/gpu/drm/exynos/Makefile
index bdf4212dde7b..a51c5459bb13 100644
--- a/drivers/gpu/drm/exynos/Makefile
+++ b/drivers/gpu/drm/exynos/Makefile
@@ -18,7 +18,6 @@ exynosdrm-$(CONFIG_DRM_EXYNOS_MIXER) += exynos_mixer.o
exynosdrm-$(CONFIG_DRM_EXYNOS_HDMI) += exynos_hdmi.o
exynosdrm-$(CONFIG_DRM_EXYNOS_VIDI) += exynos_drm_vidi.o
exynosdrm-$(CONFIG_DRM_EXYNOS_G2D) += exynos_drm_g2d.o
-exynosdrm-$(CONFIG_DRM_EXYNOS_IPP) += exynos_drm_ipp.o
exynosdrm-$(CONFIG_DRM_EXYNOS_FIMC) += exynos_drm_fimc.o
exynosdrm-$(CONFIG_DRM_EXYNOS_ROTATOR) += exynos_drm_rotator.o
exynosdrm-$(CONFIG_DRM_EXYNOS_GSC) += exynos_drm_gsc.o
diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index 6be5b53c3b27..1c330f2a7a5d 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -21,13 +21,12 @@
#include <linux/pm_runtime.h>
#include <linux/regmap.h>
-#include <video/exynos5433_decon.h>
-
#include "exynos_drm_drv.h"
#include "exynos_drm_crtc.h"
#include "exynos_drm_fb.h"
#include "exynos_drm_plane.h"
#include "exynos_drm_iommu.h"
+#include "regs-decon5433.h"
#define DSD_CFG_MUX 0x1004
#define DSD_CFG_MUX_TE_UNMASK_GLOBAL BIT(13)
@@ -744,11 +743,6 @@ static int exynos5433_decon_probe(struct platform_device *pdev)
}
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!res) {
- dev_err(dev, "cannot find IO resource\n");
- return -ENXIO;
- }
-
ctx->addr = devm_ioremap_resource(dev, res);
if (IS_ERR(ctx->addr)) {
dev_err(dev, "ioremap failed\n");
diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
index 615efcf7782a..3931d5e33fe0 100644
--- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
@@ -25,13 +25,13 @@
#include <video/of_display_timing.h>
#include <video/of_videomode.h>
-#include <video/exynos7_decon.h>
#include "exynos_drm_crtc.h"
#include "exynos_drm_plane.h"
#include "exynos_drm_drv.h"
#include "exynos_drm_fb.h"
#include "exynos_drm_iommu.h"
+#include "regs-decon7.h"
/*
* DECON stands for Display and Enhancement controller.
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index b96bd5a781b2..a518e9c6d6cc 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -29,7 +29,6 @@
#include "exynos_drm_plane.h"
#include "exynos_drm_vidi.h"
#include "exynos_drm_g2d.h"
-#include "exynos_drm_ipp.h"
#include "exynos_drm_iommu.h"
#define DRIVER_NAME "exynos"
@@ -109,14 +108,6 @@ static const struct drm_ioctl_desc exynos_ioctls[] = {
DRM_AUTH | DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(EXYNOS_G2D_EXEC, exynos_g2d_exec_ioctl,
DRM_AUTH | DRM_RENDER_ALLOW),
- DRM_IOCTL_DEF_DRV(EXYNOS_IPP_GET_PROPERTY, exynos_drm_ipp_get_property,
- DRM_AUTH | DRM_RENDER_ALLOW),
- DRM_IOCTL_DEF_DRV(EXYNOS_IPP_SET_PROPERTY, exynos_drm_ipp_set_property,
- DRM_AUTH | DRM_RENDER_ALLOW),
- DRM_IOCTL_DEF_DRV(EXYNOS_IPP_QUEUE_BUF, exynos_drm_ipp_queue_buf,
- DRM_AUTH | DRM_RENDER_ALLOW),
- DRM_IOCTL_DEF_DRV(EXYNOS_IPP_CMD_CTRL, exynos_drm_ipp_cmd_ctrl,
- DRM_AUTH | DRM_RENDER_ALLOW),
};
static const struct file_operations exynos_drm_driver_fops = {
@@ -257,9 +248,6 @@ static struct exynos_drm_driver_info exynos_drm_drivers[] = {
}, {
DRV_PTR(gsc_driver, CONFIG_DRM_EXYNOS_GSC),
}, {
- DRV_PTR(ipp_driver, CONFIG_DRM_EXYNOS_IPP),
- DRM_VIRTUAL_DEVICE
- }, {
&exynos_drm_platform_driver,
DRM_VIRTUAL_DEVICE
}
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h
index 589d465a7f88..df2262f70d91 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h
@@ -188,7 +188,6 @@ struct exynos_drm_g2d_private {
struct drm_exynos_file_private {
struct exynos_drm_g2d_private *g2d_priv;
- struct device *ipp_dev;
};
/*
@@ -291,6 +290,5 @@ extern struct platform_driver g2d_driver;
extern struct platform_driver fimc_driver;
extern struct platform_driver rotator_driver;
extern struct platform_driver gsc_driver;
-extern struct platform_driver ipp_driver;
extern struct platform_driver mic_driver;
#endif
diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.c b/drivers/gpu/drm/exynos/exynos_drm_ipp.c
deleted file mode 100644
index 3edda18cc2d2..000000000000
--- a/drivers/gpu/drm/exynos/exynos_drm_ipp.c
+++ /dev/null
@@ -1,1806 +0,0 @@
-/*
- * Copyright (C) 2012 Samsung Electronics Co.Ltd
- * Authors:
- * Eunchul Kim <[email protected]>
- * Jinyoung Jeon <[email protected]>
- * Sangmin Lee <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- */
-#include <linux/kernel.h>
-#include <linux/platform_device.h>
-#include <linux/types.h>
-#include <linux/clk.h>
-#include <linux/pm_runtime.h>
-
-#include <drm/drmP.h>
-#include <drm/exynos_drm.h>
-#include "exynos_drm_drv.h"
-#include "exynos_drm_gem.h"
-#include "exynos_drm_ipp.h"
-#include "exynos_drm_iommu.h"
-
-/*
- * IPP stands for Image Post Processing and
- * supports image scaler/rotator and input/output DMA operations.
- * using FIMC, GSC, Rotator, so on.
- * IPP is integration device driver of same attribute h/w
- */
-
-/*
- * TODO
- * 1. expand command control id.
- * 2. integrate property and config.
- * 3. removed send_event id check routine.
- * 4. compare send_event id if needed.
- * 5. free subdrv_remove notifier callback list if needed.
- * 6. need to check subdrv_open about multi-open.
- * 7. need to power_on implement power and sysmmu ctrl.
- */
-
-#define get_ipp_context(dev) platform_get_drvdata(to_platform_device(dev))
-#define ipp_is_m2m_cmd(c) (c == IPP_CMD_M2M)
-
-/*
- * A structure of event.
- *
- * @base: base of event.
- * @event: ipp event.
- */
-struct drm_exynos_ipp_send_event {
- struct drm_pending_event base;
- struct drm_exynos_ipp_event event;
-};
-
-/*
- * A structure of memory node.
- *
- * @list: list head to memory queue information.
- * @ops_id: id of operations.
- * @prop_id: id of property.
- * @buf_id: id of buffer.
- * @buf_info: gem objects and dma address, size.
- * @filp: a pointer to drm_file.
- */
-struct drm_exynos_ipp_mem_node {
- struct list_head list;
- enum drm_exynos_ops_id ops_id;
- u32 prop_id;
- u32 buf_id;
- struct drm_exynos_ipp_buf_info buf_info;
-};
-
-/*
- * A structure of ipp context.
- *
- * @subdrv: prepare initialization using subdrv.
- * @ipp_lock: lock for synchronization of access to ipp_idr.
- * @prop_lock: lock for synchronization of access to prop_idr.
- * @ipp_idr: ipp driver idr.
- * @prop_idr: property idr.
- * @event_workq: event work queue.
- * @cmd_workq: command work queue.
- */
-struct ipp_context {
- struct exynos_drm_subdrv subdrv;
- struct mutex ipp_lock;
- struct mutex prop_lock;
- struct idr ipp_idr;
- struct idr prop_idr;
- struct workqueue_struct *event_workq;
- struct workqueue_struct *cmd_workq;
-};
-
-static LIST_HEAD(exynos_drm_ippdrv_list);
-static DEFINE_MUTEX(exynos_drm_ippdrv_lock);
-static BLOCKING_NOTIFIER_HEAD(exynos_drm_ippnb_list);
-
-int exynos_drm_ippdrv_register(struct exynos_drm_ippdrv *ippdrv)
-{
- mutex_lock(&exynos_drm_ippdrv_lock);
- list_add_tail(&ippdrv->drv_list, &exynos_drm_ippdrv_list);
- mutex_unlock(&exynos_drm_ippdrv_lock);
-
- return 0;
-}
-
-int exynos_drm_ippdrv_unregister(struct exynos_drm_ippdrv *ippdrv)
-{
- mutex_lock(&exynos_drm_ippdrv_lock);
- list_del(&ippdrv->drv_list);
- mutex_unlock(&exynos_drm_ippdrv_lock);
-
- return 0;
-}
-
-static int ipp_create_id(struct idr *id_idr, struct mutex *lock, void *obj)
-{
- int ret;
-
- mutex_lock(lock);
- ret = idr_alloc(id_idr, obj, 1, 0, GFP_KERNEL);
- mutex_unlock(lock);
-
- return ret;
-}
-
-static void ipp_remove_id(struct idr *id_idr, struct mutex *lock, u32 id)
-{
- mutex_lock(lock);
- idr_remove(id_idr, id);
- mutex_unlock(lock);
-}
-
-static void *ipp_find_obj(struct idr *id_idr, struct mutex *lock, u32 id)
-{
- void *obj;
-
- mutex_lock(lock);
- obj = idr_find(id_idr, id);
- mutex_unlock(lock);
-
- return obj;
-}
-
-static int ipp_check_driver(struct exynos_drm_ippdrv *ippdrv,
- struct drm_exynos_ipp_property *property)
-{
- if (ippdrv->dedicated || (!ipp_is_m2m_cmd(property->cmd) &&
- !pm_runtime_suspended(ippdrv->dev)))
- return -EBUSY;
-
- if (ippdrv->check_property &&
- ippdrv->check_property(ippdrv->dev, property))
- return -EINVAL;
-
- return 0;
-}
-
-static struct exynos_drm_ippdrv *ipp_find_driver(struct ipp_context *ctx,
- struct drm_exynos_ipp_property *property)
-{
- struct exynos_drm_ippdrv *ippdrv;
- u32 ipp_id = property->ipp_id;
- int ret;
-
- if (ipp_id) {
- ippdrv = ipp_find_obj(&ctx->ipp_idr, &ctx->ipp_lock, ipp_id);
- if (!ippdrv) {
- DRM_DEBUG("ipp%d driver not found\n", ipp_id);
- return ERR_PTR(-ENODEV);
- }
-
- ret = ipp_check_driver(ippdrv, property);
- if (ret < 0) {
- DRM_DEBUG("ipp%d driver check error %d\n", ipp_id, ret);
- return ERR_PTR(ret);
- }
-
- return ippdrv;
- } else {
- list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list) {
- ret = ipp_check_driver(ippdrv, property);
- if (ret == 0)
- return ippdrv;
- }
-
- DRM_DEBUG("cannot find driver suitable for given property.\n");
- }
-
- return ERR_PTR(-ENODEV);
-}
-
-static struct exynos_drm_ippdrv *ipp_find_drv_by_handle(u32 prop_id)
-{
- struct exynos_drm_ippdrv *ippdrv;
- struct drm_exynos_ipp_cmd_node *c_node;
- int count = 0;
-
- DRM_DEBUG_KMS("prop_id[%d]\n", prop_id);
-
- /*
- * This case is search ipp driver by prop_id handle.
- * sometimes, ipp subsystem find driver by prop_id.
- * e.g PAUSE state, queue buf, command control.
- */
- list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list) {
- DRM_DEBUG_KMS("count[%d]ippdrv[%pK]\n", count++, ippdrv);
-
- mutex_lock(&ippdrv->cmd_lock);
- list_for_each_entry(c_node, &ippdrv->cmd_list, list) {
- if (c_node->property.prop_id == prop_id) {
- mutex_unlock(&ippdrv->cmd_lock);
- return ippdrv;
- }
- }
- mutex_unlock(&ippdrv->cmd_lock);
- }
-
- return ERR_PTR(-ENODEV);
-}
-
-int exynos_drm_ipp_get_property(struct drm_device *drm_dev, void *data,
- struct drm_file *file)
-{
- struct drm_exynos_file_private *file_priv = file->driver_priv;
- struct device *dev = file_priv->ipp_dev;
- struct ipp_context *ctx = get_ipp_context(dev);
- struct drm_exynos_ipp_prop_list *prop_list = data;
- struct exynos_drm_ippdrv *ippdrv;
- int count = 0;
-
- if (!ctx) {
- DRM_ERROR("invalid context.\n");
- return -EINVAL;
- }
-
- if (!prop_list) {
- DRM_ERROR("invalid property parameter.\n");
- return -EINVAL;
- }
-
- DRM_DEBUG_KMS("ipp_id[%d]\n", prop_list->ipp_id);
-
- if (!prop_list->ipp_id) {
- list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list)
- count++;
-
- /*
- * Supports ippdrv list count for user application.
- * First step user application getting ippdrv count.
- * and second step getting ippdrv capability using ipp_id.
- */
- prop_list->count = count;
- } else {
- /*
- * Getting ippdrv capability by ipp_id.
- * some device not supported wb, output interface.
- * so, user application detect correct ipp driver
- * using this ioctl.
- */
- ippdrv = ipp_find_obj(&ctx->ipp_idr, &ctx->ipp_lock,
- prop_list->ipp_id);
- if (!ippdrv) {
- DRM_ERROR("not found ipp%d driver.\n",
- prop_list->ipp_id);
- return -ENODEV;
- }
-
- *prop_list = ippdrv->prop_list;
- }
-
- return 0;
-}
-
-static void ipp_print_property(struct drm_exynos_ipp_property *property,
- int idx)
-{
- struct drm_exynos_ipp_config *config = &property->config[idx];
- struct drm_exynos_pos *pos = &config->pos;
- struct drm_exynos_sz *sz = &config->sz;
-
- DRM_DEBUG_KMS("prop_id[%d]ops[%s]fmt[0x%x]\n",
- property->prop_id, idx ? "dst" : "src", config->fmt);
-
- DRM_DEBUG_KMS("pos[%d %d %d %d]sz[%d %d]f[%d]r[%d]\n",
- pos->x, pos->y, pos->w, pos->h,
- sz->hsize, sz->vsize, config->flip, config->degree);
-}
-
-static struct drm_exynos_ipp_cmd_work *ipp_create_cmd_work(void)
-{
- struct drm_exynos_ipp_cmd_work *cmd_work;
-
- cmd_work = kzalloc(sizeof(*cmd_work), GFP_KERNEL);
- if (!cmd_work)
- return ERR_PTR(-ENOMEM);
-
- INIT_WORK((struct work_struct *)cmd_work, ipp_sched_cmd);
-
- return cmd_work;
-}
-
-static struct drm_exynos_ipp_event_work *ipp_create_event_work(void)
-{
- struct drm_exynos_ipp_event_work *event_work;
-
- event_work = kzalloc(sizeof(*event_work), GFP_KERNEL);
- if (!event_work)
- return ERR_PTR(-ENOMEM);
-
- INIT_WORK(&event_work->work, ipp_sched_event);
-
- return event_work;
-}
-
-int exynos_drm_ipp_set_property(struct drm_device *drm_dev, void *data,
- struct drm_file *file)
-{
- struct drm_exynos_file_private *file_priv = file->driver_priv;
- struct device *dev = file_priv->ipp_dev;
- struct ipp_context *ctx = get_ipp_context(dev);
- struct drm_exynos_ipp_property *property = data;
- struct exynos_drm_ippdrv *ippdrv;
- struct drm_exynos_ipp_cmd_node *c_node;
- u32 prop_id;
- int ret, i;
-
- if (!ctx) {
- DRM_ERROR("invalid context.\n");
- return -EINVAL;
- }
-
- if (!property) {
- DRM_ERROR("invalid property parameter.\n");
- return -EINVAL;
- }
-
- prop_id = property->prop_id;
-
- /*
- * This is log print for user application property.
- * user application set various property.
- */
- for_each_ipp_ops(i)
- ipp_print_property(property, i);
-
- /*
- * In case prop_id is not zero try to set existing property.
- */
- if (prop_id) {
- c_node = ipp_find_obj(&ctx->prop_idr, &ctx->prop_lock, prop_id);
-
- if (!c_node || c_node->filp != file) {
- DRM_DEBUG_KMS("prop_id[%d] not found\n", prop_id);
- return -EINVAL;
- }
-
- if (c_node->state != IPP_STATE_STOP) {
- DRM_DEBUG_KMS("prop_id[%d] not stopped\n", prop_id);
- return -EINVAL;
- }
-
- c_node->property = *property;
-
- return 0;
- }
-
- /* find ipp driver using ipp id */
- ippdrv = ipp_find_driver(ctx, property);
- if (IS_ERR(ippdrv)) {
- DRM_ERROR("failed to get ipp driver.\n");
- return -EINVAL;
- }
-
- /* allocate command node */
- c_node = kzalloc(sizeof(*c_node), GFP_KERNEL);
- if (!c_node)
- return -ENOMEM;
-
- ret = ipp_create_id(&ctx->prop_idr, &ctx->prop_lock, c_node);
- if (ret < 0) {
- DRM_ERROR("failed to create id.\n");
- goto err_clear;
- }
- property->prop_id = ret;
-
- DRM_DEBUG_KMS("created prop_id[%d]cmd[%d]ippdrv[%pK]\n",
- property->prop_id, property->cmd, ippdrv);
-
- /* stored property information and ippdrv in private data */
- c_node->property = *property;
- c_node->state = IPP_STATE_IDLE;
- c_node->filp = file;
-
- c_node->start_work = ipp_create_cmd_work();
- if (IS_ERR(c_node->start_work)) {
- DRM_ERROR("failed to create start work.\n");
- ret = PTR_ERR(c_node->start_work);
- goto err_remove_id;
- }
-
- c_node->stop_work = ipp_create_cmd_work();
- if (IS_ERR(c_node->stop_work)) {
- DRM_ERROR("failed to create stop work.\n");
- ret = PTR_ERR(c_node->stop_work);
- goto err_free_start;
- }
-
- c_node->event_work = ipp_create_event_work();
- if (IS_ERR(c_node->event_work)) {
- DRM_ERROR("failed to create event work.\n");
- ret = PTR_ERR(c_node->event_work);
- goto err_free_stop;
- }
-
- mutex_init(&c_node->lock);
- mutex_init(&c_node->mem_lock);
- mutex_init(&c_node->event_lock);
-
- init_completion(&c_node->start_complete);
- init_completion(&c_node->stop_complete);
-
- for_each_ipp_ops(i)
- INIT_LIST_HEAD(&c_node->mem_list[i]);
-
- INIT_LIST_HEAD(&c_node->event_list);
- mutex_lock(&ippdrv->cmd_lock);
- list_add_tail(&c_node->list, &ippdrv->cmd_list);
- mutex_unlock(&ippdrv->cmd_lock);
-
- /* make dedicated state without m2m */
- if (!ipp_is_m2m_cmd(property->cmd))
- ippdrv->dedicated = true;
-
- return 0;
-
-err_free_stop:
- kfree(c_node->stop_work);
-err_free_start:
- kfree(c_node->start_work);
-err_remove_id:
- ipp_remove_id(&ctx->prop_idr, &ctx->prop_lock, property->prop_id);
-err_clear:
- kfree(c_node);
- return ret;
-}
-
-static int ipp_validate_mem_node(struct drm_device *drm_dev,
- struct drm_exynos_ipp_mem_node *m_node,
- struct drm_exynos_ipp_cmd_node *c_node)
-{
- struct drm_exynos_ipp_config *ipp_cfg;
- unsigned int num_plane;
- unsigned long size, buf_size = 0, plane_size, img_size = 0;
- unsigned int bpp, width, height;
- int i;
-
- ipp_cfg = &c_node->property.config[m_node->ops_id];
- num_plane = drm_format_num_planes(ipp_cfg->fmt);
-
- /**
- * This is a rather simplified validation of a memory node.
- * It basically verifies provided gem object handles
- * and the buffer sizes with respect to current configuration.
- * This is not the best that can be done
- * but it seems more than enough
- */
- for (i = 0; i < num_plane; ++i) {
- width = ipp_cfg->sz.hsize;
- height = ipp_cfg->sz.vsize;
- bpp = drm_format_plane_cpp(ipp_cfg->fmt, i);
-
- /*
- * The result of drm_format_plane_cpp() for chroma planes must
- * be used with drm_format_xxxx_chroma_subsampling() for
- * correct result.
- */
- if (i > 0) {
- width /= drm_format_horz_chroma_subsampling(
- ipp_cfg->fmt);
- height /= drm_format_vert_chroma_subsampling(
- ipp_cfg->fmt);
- }
- plane_size = width * height * bpp;
- img_size += plane_size;
-
- if (m_node->buf_info.handles[i]) {
- size = exynos_drm_gem_get_size(drm_dev,
- m_node->buf_info.handles[i],
- c_node->filp);
- if (plane_size > size) {
- DRM_ERROR(
- "buffer %d is smaller than required\n",
- i);
- return -EINVAL;
- }
-
- buf_size += size;
- }
- }
-
- if (buf_size < img_size) {
- DRM_ERROR("size of buffers(%lu) is smaller than image(%lu)\n",
- buf_size, img_size);
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int ipp_put_mem_node(struct drm_device *drm_dev,
- struct drm_exynos_ipp_cmd_node *c_node,
- struct drm_exynos_ipp_mem_node *m_node)
-{
- int i;
-
- DRM_DEBUG_KMS("node[%pK]\n", m_node);
-
- if (!m_node) {
- DRM_ERROR("invalid dequeue node.\n");
- return -EFAULT;
- }
-
- DRM_DEBUG_KMS("ops_id[%d]\n", m_node->ops_id);
-
- /* put gem buffer */
- for_each_ipp_planar(i) {
- unsigned long handle = m_node->buf_info.handles[i];
- if (handle)
- exynos_drm_gem_put_dma_addr(drm_dev, handle,
- c_node->filp);
- }
-
- list_del(&m_node->list);
- kfree(m_node);
-
- return 0;
-}
-
-static struct drm_exynos_ipp_mem_node
- *ipp_get_mem_node(struct drm_device *drm_dev,
- struct drm_exynos_ipp_cmd_node *c_node,
- struct drm_exynos_ipp_queue_buf *qbuf)
-{
- struct drm_exynos_ipp_mem_node *m_node;
- struct drm_exynos_ipp_buf_info *buf_info;
- int i;
-
- m_node = kzalloc(sizeof(*m_node), GFP_KERNEL);
- if (!m_node)
- return ERR_PTR(-ENOMEM);
-
- buf_info = &m_node->buf_info;
-
- /* operations, buffer id */
- m_node->ops_id = qbuf->ops_id;
- m_node->prop_id = qbuf->prop_id;
- m_node->buf_id = qbuf->buf_id;
- INIT_LIST_HEAD(&m_node->list);
-
- DRM_DEBUG_KMS("m_node[%pK]ops_id[%d]\n", m_node, qbuf->ops_id);
- DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]\n", qbuf->prop_id, m_node->buf_id);
-
- for_each_ipp_planar(i) {
- DRM_DEBUG_KMS("i[%d]handle[0x%x]\n", i, qbuf->handle[i]);
-
- /* get dma address by handle */
- if (qbuf->handle[i]) {
- dma_addr_t *addr;
-
- addr = exynos_drm_gem_get_dma_addr(drm_dev,
- qbuf->handle[i], c_node->filp);
- if (IS_ERR(addr)) {
- DRM_ERROR("failed to get addr.\n");
- ipp_put_mem_node(drm_dev, c_node, m_node);
- return ERR_PTR(-EFAULT);
- }
-
- buf_info->handles[i] = qbuf->handle[i];
- buf_info->base[i] = *addr;
- DRM_DEBUG_KMS("i[%d]base[%pad]hd[0x%lx]\n", i,
- &buf_info->base[i], buf_info->handles[i]);
- }
- }
-
- mutex_lock(&c_node->mem_lock);
- if (ipp_validate_mem_node(drm_dev, m_node, c_node)) {
- ipp_put_mem_node(drm_dev, c_node, m_node);
- mutex_unlock(&c_node->mem_lock);
- return ERR_PTR(-EFAULT);
- }
- list_add_tail(&m_node->list, &c_node->mem_list[qbuf->ops_id]);
- mutex_unlock(&c_node->mem_lock);
-
- return m_node;
-}
-
-static void ipp_clean_mem_nodes(struct drm_device *drm_dev,
- struct drm_exynos_ipp_cmd_node *c_node, int ops)
-{
- struct drm_exynos_ipp_mem_node *m_node, *tm_node;
- struct list_head *head = &c_node->mem_list[ops];
-
- mutex_lock(&c_node->mem_lock);
-
- list_for_each_entry_safe(m_node, tm_node, head, list) {
- int ret;
-
- ret = ipp_put_mem_node(drm_dev, c_node, m_node);
- if (ret)
- DRM_ERROR("failed to put m_node.\n");
- }
-
- mutex_unlock(&c_node->mem_lock);
-}
-
-static int ipp_get_event(struct drm_device *drm_dev,
- struct drm_exynos_ipp_cmd_node *c_node,
- struct drm_exynos_ipp_queue_buf *qbuf)
-{
- struct drm_exynos_ipp_send_event *e;
- int ret;
-
- DRM_DEBUG_KMS("ops_id[%d]buf_id[%d]\n", qbuf->ops_id, qbuf->buf_id);
-
- e = kzalloc(sizeof(*e), GFP_KERNEL);
- if (!e)
- return -ENOMEM;
-
- /* make event */
- e->event.base.type = DRM_EXYNOS_IPP_EVENT;
- e->event.base.length = sizeof(e->event);
- e->event.user_data = qbuf->user_data;
- e->event.prop_id = qbuf->prop_id;
- e->event.buf_id[EXYNOS_DRM_OPS_DST] = qbuf->buf_id;
-
- ret = drm_event_reserve_init(drm_dev, c_node->filp, &e->base, &e->event.base);
- if (ret) {
- kfree(e);
- return ret;
- }
-
- mutex_lock(&c_node->event_lock);
- list_add_tail(&e->base.link, &c_node->event_list);
- mutex_unlock(&c_node->event_lock);
-
- return 0;
-}
-
-static void ipp_put_event(struct drm_exynos_ipp_cmd_node *c_node,
- struct drm_exynos_ipp_queue_buf *qbuf)
-{
- struct drm_exynos_ipp_send_event *e, *te;
- int count = 0;
-
- mutex_lock(&c_node->event_lock);
- list_for_each_entry_safe(e, te, &c_node->event_list, base.link) {
- DRM_DEBUG_KMS("count[%d]e[%pK]\n", count++, e);
-
- /*
- * qbuf == NULL condition means all event deletion.
- * stop operations want to delete all event list.
- * another case delete only same buf id.
- */
- if (!qbuf) {
- /* delete list */
- list_del(&e->base.link);
- kfree(e);
- }
-
- /* compare buffer id */
- if (qbuf && (qbuf->buf_id ==
- e->event.buf_id[EXYNOS_DRM_OPS_DST])) {
- /* delete list */
- list_del(&e->base.link);
- kfree(e);
- goto out_unlock;
- }
- }
-
-out_unlock:
- mutex_unlock(&c_node->event_lock);
- return;
-}
-
-static void ipp_clean_cmd_node(struct ipp_context *ctx,
- struct drm_exynos_ipp_cmd_node *c_node)
-{
- int i;
-
- /* cancel works */
- cancel_work_sync(&c_node->start_work->work);
- cancel_work_sync(&c_node->stop_work->work);
- cancel_work_sync(&c_node->event_work->work);
-
- /* put event */
- ipp_put_event(c_node, NULL);
-
- for_each_ipp_ops(i)
- ipp_clean_mem_nodes(ctx->subdrv.drm_dev, c_node, i);
-
- /* delete list */
- list_del(&c_node->list);
-
- ipp_remove_id(&ctx->prop_idr, &ctx->prop_lock,
- c_node->property.prop_id);
-
- /* destroy mutex */
- mutex_destroy(&c_node->lock);
- mutex_destroy(&c_node->mem_lock);
- mutex_destroy(&c_node->event_lock);
-
- /* free command node */
- kfree(c_node->start_work);
- kfree(c_node->stop_work);
- kfree(c_node->event_work);
- kfree(c_node);
-}
-
-static bool ipp_check_mem_list(struct drm_exynos_ipp_cmd_node *c_node)
-{
- switch (c_node->property.cmd) {
- case IPP_CMD_WB:
- return !list_empty(&c_node->mem_list[EXYNOS_DRM_OPS_DST]);
- case IPP_CMD_OUTPUT:
- return !list_empty(&c_node->mem_list[EXYNOS_DRM_OPS_SRC]);
- case IPP_CMD_M2M:
- default:
- return !list_empty(&c_node->mem_list[EXYNOS_DRM_OPS_SRC]) &&
- !list_empty(&c_node->mem_list[EXYNOS_DRM_OPS_DST]);
- }
-}
-
-static struct drm_exynos_ipp_mem_node
- *ipp_find_mem_node(struct drm_exynos_ipp_cmd_node *c_node,
- struct drm_exynos_ipp_queue_buf *qbuf)
-{
- struct drm_exynos_ipp_mem_node *m_node;
- struct list_head *head;
- int count = 0;
-
- DRM_DEBUG_KMS("buf_id[%d]\n", qbuf->buf_id);
-
- /* source/destination memory list */
- head = &c_node->mem_list[qbuf->ops_id];
-
- /* find memory node from memory list */
- list_for_each_entry(m_node, head, list) {
- DRM_DEBUG_KMS("count[%d]m_node[%pK]\n", count++, m_node);
-
- /* compare buffer id */
- if (m_node->buf_id == qbuf->buf_id)
- return m_node;
- }
-
- return NULL;
-}
-
-static int ipp_set_mem_node(struct exynos_drm_ippdrv *ippdrv,
- struct drm_exynos_ipp_cmd_node *c_node,
- struct drm_exynos_ipp_mem_node *m_node)
-{
- struct exynos_drm_ipp_ops *ops = NULL;
- int ret = 0;
-
- DRM_DEBUG_KMS("node[%pK]\n", m_node);
-
- if (!m_node) {
- DRM_ERROR("invalid queue node.\n");
- return -EFAULT;
- }
-
- DRM_DEBUG_KMS("ops_id[%d]\n", m_node->ops_id);
-
- /* get operations callback */
- ops = ippdrv->ops[m_node->ops_id];
- if (!ops) {
- DRM_ERROR("not support ops.\n");
- return -EFAULT;
- }
-
- /* set address and enable irq */
- if (ops->set_addr) {
- ret = ops->set_addr(ippdrv->dev, &m_node->buf_info,
- m_node->buf_id, IPP_BUF_ENQUEUE);
- if (ret) {
- DRM_ERROR("failed to set addr.\n");
- return ret;
- }
- }
-
- return ret;
-}
-
-static void ipp_handle_cmd_work(struct device *dev,
- struct exynos_drm_ippdrv *ippdrv,
- struct drm_exynos_ipp_cmd_work *cmd_work,
- struct drm_exynos_ipp_cmd_node *c_node)
-{
- struct ipp_context *ctx = get_ipp_context(dev);
-
- cmd_work->ippdrv = ippdrv;
- cmd_work->c_node = c_node;
- queue_work(ctx->cmd_workq, &cmd_work->work);
-}
-
-static int ipp_queue_buf_with_run(struct device *dev,
- struct drm_exynos_ipp_cmd_node *c_node,
- struct drm_exynos_ipp_mem_node *m_node,
- struct drm_exynos_ipp_queue_buf *qbuf)
-{
- struct exynos_drm_ippdrv *ippdrv;
- struct drm_exynos_ipp_property *property;
- struct exynos_drm_ipp_ops *ops;
- int ret;
-
- ippdrv = ipp_find_drv_by_handle(qbuf->prop_id);
- if (IS_ERR(ippdrv)) {
- DRM_ERROR("failed to get ipp driver.\n");
- return -EFAULT;
- }
-
- ops = ippdrv->ops[qbuf->ops_id];
- if (!ops) {
- DRM_ERROR("failed to get ops.\n");
- return -EFAULT;
- }
-
- property = &c_node->property;
-
- if (c_node->state != IPP_STATE_START) {
- DRM_DEBUG_KMS("bypass for invalid state.\n");
- return 0;
- }
-
- mutex_lock(&c_node->mem_lock);
- if (!ipp_check_mem_list(c_node)) {
- mutex_unlock(&c_node->mem_lock);
- DRM_DEBUG_KMS("empty memory.\n");
- return 0;
- }
-
- /*
- * If set destination buffer and enabled clock,
- * then m2m operations need start operations at queue_buf
- */
- if (ipp_is_m2m_cmd(property->cmd)) {
- struct drm_exynos_ipp_cmd_work *cmd_work = c_node->start_work;
-
- cmd_work->ctrl = IPP_CTRL_PLAY;
- ipp_handle_cmd_work(dev, ippdrv, cmd_work, c_node);
- } else {
- ret = ipp_set_mem_node(ippdrv, c_node, m_node);
- if (ret) {
- mutex_unlock(&c_node->mem_lock);
- DRM_ERROR("failed to set m node.\n");
- return ret;
- }
- }
- mutex_unlock(&c_node->mem_lock);
-
- return 0;
-}
-
-static void ipp_clean_queue_buf(struct drm_device *drm_dev,
- struct drm_exynos_ipp_cmd_node *c_node,
- struct drm_exynos_ipp_queue_buf *qbuf)
-{
- struct drm_exynos_ipp_mem_node *m_node, *tm_node;
-
- /* delete list */
- mutex_lock(&c_node->mem_lock);
- list_for_each_entry_safe(m_node, tm_node,
- &c_node->mem_list[qbuf->ops_id], list) {
- if (m_node->buf_id == qbuf->buf_id &&
- m_node->ops_id == qbuf->ops_id)
- ipp_put_mem_node(drm_dev, c_node, m_node);
- }
- mutex_unlock(&c_node->mem_lock);
-}
-
-int exynos_drm_ipp_queue_buf(struct drm_device *drm_dev, void *data,
- struct drm_file *file)
-{
- struct drm_exynos_file_private *file_priv = file->driver_priv;
- struct device *dev = file_priv->ipp_dev;
- struct ipp_context *ctx = get_ipp_context(dev);
- struct drm_exynos_ipp_queue_buf *qbuf = data;
- struct drm_exynos_ipp_cmd_node *c_node;
- struct drm_exynos_ipp_mem_node *m_node;
- int ret;
-
- if (!qbuf) {
- DRM_ERROR("invalid buf parameter.\n");
- return -EINVAL;
- }
-
- if (qbuf->ops_id >= EXYNOS_DRM_OPS_MAX) {
- DRM_ERROR("invalid ops parameter.\n");
- return -EINVAL;
- }
-
- DRM_DEBUG_KMS("prop_id[%d]ops_id[%s]buf_id[%d]buf_type[%d]\n",
- qbuf->prop_id, qbuf->ops_id ? "dst" : "src",
- qbuf->buf_id, qbuf->buf_type);
-
- /* find command node */
- c_node = ipp_find_obj(&ctx->prop_idr, &ctx->prop_lock,
- qbuf->prop_id);
- if (!c_node || c_node->filp != file) {
- DRM_ERROR("failed to get command node.\n");
- return -ENODEV;
- }
-
- /* buffer control */
- switch (qbuf->buf_type) {
- case IPP_BUF_ENQUEUE:
- /* get memory node */
- m_node = ipp_get_mem_node(drm_dev, c_node, qbuf);
- if (IS_ERR(m_node)) {
- DRM_ERROR("failed to get m_node.\n");
- return PTR_ERR(m_node);
- }
-
- /*
- * first step get event for destination buffer.
- * and second step when M2M case run with destination buffer
- * if needed.
- */
- if (qbuf->ops_id == EXYNOS_DRM_OPS_DST) {
- /* get event for destination buffer */
- ret = ipp_get_event(drm_dev, c_node, qbuf);
- if (ret) {
- DRM_ERROR("failed to get event.\n");
- goto err_clean_node;
- }
-
- /*
- * M2M case run play control for streaming feature.
- * other case set address and waiting.
- */
- ret = ipp_queue_buf_with_run(dev, c_node, m_node, qbuf);
- if (ret) {
- DRM_ERROR("failed to run command.\n");
- goto err_clean_node;
- }
- }
- break;
- case IPP_BUF_DEQUEUE:
- mutex_lock(&c_node->lock);
-
- /* put event for destination buffer */
- if (qbuf->ops_id == EXYNOS_DRM_OPS_DST)
- ipp_put_event(c_node, qbuf);
-
- ipp_clean_queue_buf(drm_dev, c_node, qbuf);
-
- mutex_unlock(&c_node->lock);
- break;
- default:
- DRM_ERROR("invalid buffer control.\n");
- return -EINVAL;
- }
-
- return 0;
-
-err_clean_node:
- DRM_ERROR("clean memory nodes.\n");
-
- ipp_clean_queue_buf(drm_dev, c_node, qbuf);
- return ret;
-}
-
-static bool exynos_drm_ipp_check_valid(struct device *dev,
- enum drm_exynos_ipp_ctrl ctrl, enum drm_exynos_ipp_state state)
-{
- if (ctrl != IPP_CTRL_PLAY) {
- if (pm_runtime_suspended(dev)) {
- DRM_ERROR("pm:runtime_suspended.\n");
- goto err_status;
- }
- }
-
- switch (ctrl) {
- case IPP_CTRL_PLAY:
- if (state != IPP_STATE_IDLE)
- goto err_status;
- break;
- case IPP_CTRL_STOP:
- if (state == IPP_STATE_STOP)
- goto err_status;
- break;
- case IPP_CTRL_PAUSE:
- if (state != IPP_STATE_START)
- goto err_status;
- break;
- case IPP_CTRL_RESUME:
- if (state != IPP_STATE_STOP)
- goto err_status;
- break;
- default:
- DRM_ERROR("invalid state.\n");
- goto err_status;
- }
-
- return true;
-
-err_status:
- DRM_ERROR("invalid status:ctrl[%d]state[%d]\n", ctrl, state);
- return false;
-}
-
-int exynos_drm_ipp_cmd_ctrl(struct drm_device *drm_dev, void *data,
- struct drm_file *file)
-{
- struct drm_exynos_file_private *file_priv = file->driver_priv;
- struct exynos_drm_ippdrv *ippdrv = NULL;
- struct device *dev = file_priv->ipp_dev;
- struct ipp_context *ctx = get_ipp_context(dev);
- struct drm_exynos_ipp_cmd_ctrl *cmd_ctrl = data;
- struct drm_exynos_ipp_cmd_work *cmd_work;
- struct drm_exynos_ipp_cmd_node *c_node;
-
- if (!ctx) {
- DRM_ERROR("invalid context.\n");
- return -EINVAL;
- }
-
- if (!cmd_ctrl) {
- DRM_ERROR("invalid control parameter.\n");
- return -EINVAL;
- }
-
- DRM_DEBUG_KMS("ctrl[%d]prop_id[%d]\n",
- cmd_ctrl->ctrl, cmd_ctrl->prop_id);
-
- ippdrv = ipp_find_drv_by_handle(cmd_ctrl->prop_id);
- if (IS_ERR(ippdrv)) {
- DRM_ERROR("failed to get ipp driver.\n");
- return PTR_ERR(ippdrv);
- }
-
- c_node = ipp_find_obj(&ctx->prop_idr, &ctx->prop_lock,
- cmd_ctrl->prop_id);
- if (!c_node || c_node->filp != file) {
- DRM_ERROR("invalid command node list.\n");
- return -ENODEV;
- }
-
- if (!exynos_drm_ipp_check_valid(ippdrv->dev, cmd_ctrl->ctrl,
- c_node->state)) {
- DRM_ERROR("invalid state.\n");
- return -EINVAL;
- }
-
- switch (cmd_ctrl->ctrl) {
- case IPP_CTRL_PLAY:
- if (pm_runtime_suspended(ippdrv->dev))
- pm_runtime_get_sync(ippdrv->dev);
-
- c_node->state = IPP_STATE_START;
-
- cmd_work = c_node->start_work;
- cmd_work->ctrl = cmd_ctrl->ctrl;
- ipp_handle_cmd_work(dev, ippdrv, cmd_work, c_node);
- break;
- case IPP_CTRL_STOP:
- cmd_work = c_node->stop_work;
- cmd_work->ctrl = cmd_ctrl->ctrl;
- ipp_handle_cmd_work(dev, ippdrv, cmd_work, c_node);
-
- if (!wait_for_completion_timeout(&c_node->stop_complete,
- msecs_to_jiffies(300))) {
- DRM_ERROR("timeout stop:prop_id[%d]\n",
- c_node->property.prop_id);
- }
-
- c_node->state = IPP_STATE_STOP;
- ippdrv->dedicated = false;
- mutex_lock(&ippdrv->cmd_lock);
- ipp_clean_cmd_node(ctx, c_node);
-
- if (list_empty(&ippdrv->cmd_list))
- pm_runtime_put_sync(ippdrv->dev);
- mutex_unlock(&ippdrv->cmd_lock);
- break;
- case IPP_CTRL_PAUSE:
- cmd_work = c_node->stop_work;
- cmd_work->ctrl = cmd_ctrl->ctrl;
- ipp_handle_cmd_work(dev, ippdrv, cmd_work, c_node);
-
- if (!wait_for_completion_timeout(&c_node->stop_complete,
- msecs_to_jiffies(200))) {
- DRM_ERROR("timeout stop:prop_id[%d]\n",
- c_node->property.prop_id);
- }
-
- c_node->state = IPP_STATE_STOP;
- break;
- case IPP_CTRL_RESUME:
- c_node->state = IPP_STATE_START;
- cmd_work = c_node->start_work;
- cmd_work->ctrl = cmd_ctrl->ctrl;
- ipp_handle_cmd_work(dev, ippdrv, cmd_work, c_node);
- break;
- default:
- DRM_ERROR("could not support this state currently.\n");
- return -EINVAL;
- }
-
- DRM_DEBUG_KMS("done ctrl[%d]prop_id[%d]\n",
- cmd_ctrl->ctrl, cmd_ctrl->prop_id);
-
- return 0;
-}
-
-int exynos_drm_ippnb_register(struct notifier_block *nb)
-{
- return blocking_notifier_chain_register(
- &exynos_drm_ippnb_list, nb);
-}
-
-int exynos_drm_ippnb_unregister(struct notifier_block *nb)
-{
- return blocking_notifier_chain_unregister(
- &exynos_drm_ippnb_list, nb);
-}
-
-int exynos_drm_ippnb_send_event(unsigned long val, void *v)
-{
- return blocking_notifier_call_chain(
- &exynos_drm_ippnb_list, val, v);
-}
-
-static int ipp_set_property(struct exynos_drm_ippdrv *ippdrv,
- struct drm_exynos_ipp_property *property)
-{
- struct exynos_drm_ipp_ops *ops = NULL;
- bool swap = false;
- int ret, i;
-
- if (!property) {
- DRM_ERROR("invalid property parameter.\n");
- return -EINVAL;
- }
-
- DRM_DEBUG_KMS("prop_id[%d]\n", property->prop_id);
-
- /* reset h/w block */
- if (ippdrv->reset &&
- ippdrv->reset(ippdrv->dev)) {
- return -EINVAL;
- }
-
- /* set source,destination operations */
- for_each_ipp_ops(i) {
- struct drm_exynos_ipp_config *config =
- &property->config[i];
-
- ops = ippdrv->ops[i];
- if (!ops || !config) {
- DRM_ERROR("not support ops and config.\n");
- return -EINVAL;
- }
-
- /* set format */
- if (ops->set_fmt) {
- ret = ops->set_fmt(ippdrv->dev, config->fmt);
- if (ret)
- return ret;
- }
-
- /* set transform for rotation, flip */
- if (ops->set_transf) {
- ret = ops->set_transf(ippdrv->dev, config->degree,
- config->flip, &swap);
- if (ret)
- return ret;
- }
-
- /* set size */
- if (ops->set_size) {
- ret = ops->set_size(ippdrv->dev, swap, &config->pos,
- &config->sz);
- if (ret)
- return ret;
- }
- }
-
- return 0;
-}
-
-static int ipp_start_property(struct exynos_drm_ippdrv *ippdrv,
- struct drm_exynos_ipp_cmd_node *c_node)
-{
- struct drm_exynos_ipp_mem_node *m_node;
- struct drm_exynos_ipp_property *property = &c_node->property;
- struct list_head *head;
- int ret, i;
-
- DRM_DEBUG_KMS("prop_id[%d]\n", property->prop_id);
-
- /* store command info in ippdrv */
- ippdrv->c_node = c_node;
-
- mutex_lock(&c_node->mem_lock);
- if (!ipp_check_mem_list(c_node)) {
- DRM_DEBUG_KMS("empty memory.\n");
- ret = -ENOMEM;
- goto err_unlock;
- }
-
- /* set current property in ippdrv */
- ret = ipp_set_property(ippdrv, property);
- if (ret) {
- DRM_ERROR("failed to set property.\n");
- ippdrv->c_node = NULL;
- goto err_unlock;
- }
-
- /* check command */
- switch (property->cmd) {
- case IPP_CMD_M2M:
- for_each_ipp_ops(i) {
- /* source/destination memory list */
- head = &c_node->mem_list[i];
-
- m_node = list_first_entry(head,
- struct drm_exynos_ipp_mem_node, list);
-
- DRM_DEBUG_KMS("m_node[%pK]\n", m_node);
-
- ret = ipp_set_mem_node(ippdrv, c_node, m_node);
- if (ret) {
- DRM_ERROR("failed to set m node.\n");
- goto err_unlock;
- }
- }
- break;
- case IPP_CMD_WB:
- /* destination memory list */
- head = &c_node->mem_list[EXYNOS_DRM_OPS_DST];
-
- list_for_each_entry(m_node, head, list) {
- ret = ipp_set_mem_node(ippdrv, c_node, m_node);
- if (ret) {
- DRM_ERROR("failed to set m node.\n");
- goto err_unlock;
- }
- }
- break;
- case IPP_CMD_OUTPUT:
- /* source memory list */
- head = &c_node->mem_list[EXYNOS_DRM_OPS_SRC];
-
- list_for_each_entry(m_node, head, list) {
- ret = ipp_set_mem_node(ippdrv, c_node, m_node);
- if (ret) {
- DRM_ERROR("failed to set m node.\n");
- goto err_unlock;
- }
- }
- break;
- default:
- DRM_ERROR("invalid operations.\n");
- ret = -EINVAL;
- goto err_unlock;
- }
- mutex_unlock(&c_node->mem_lock);
-
- DRM_DEBUG_KMS("cmd[%d]\n", property->cmd);
-
- /* start operations */
- if (ippdrv->start) {
- ret = ippdrv->start(ippdrv->dev, property->cmd);
- if (ret) {
- DRM_ERROR("failed to start ops.\n");
- ippdrv->c_node = NULL;
- return ret;
- }
- }
-
- return 0;
-
-err_unlock:
- mutex_unlock(&c_node->mem_lock);
- ippdrv->c_node = NULL;
- return ret;
-}
-
-static int ipp_stop_property(struct drm_device *drm_dev,
- struct exynos_drm_ippdrv *ippdrv,
- struct drm_exynos_ipp_cmd_node *c_node)
-{
- struct drm_exynos_ipp_property *property = &c_node->property;
- int i;
-
- DRM_DEBUG_KMS("prop_id[%d]\n", property->prop_id);
-
- /* stop operations */
- if (ippdrv->stop)
- ippdrv->stop(ippdrv->dev, property->cmd);
-
- /* check command */
- switch (property->cmd) {
- case IPP_CMD_M2M:
- for_each_ipp_ops(i)
- ipp_clean_mem_nodes(drm_dev, c_node, i);
- break;
- case IPP_CMD_WB:
- ipp_clean_mem_nodes(drm_dev, c_node, EXYNOS_DRM_OPS_DST);
- break;
- case IPP_CMD_OUTPUT:
- ipp_clean_mem_nodes(drm_dev, c_node, EXYNOS_DRM_OPS_SRC);
- break;
- default:
- DRM_ERROR("invalid operations.\n");
- return -EINVAL;
- }
-
- return 0;
-}
-
-void ipp_sched_cmd(struct work_struct *work)
-{
- struct drm_exynos_ipp_cmd_work *cmd_work =
- container_of(work, struct drm_exynos_ipp_cmd_work, work);
- struct exynos_drm_ippdrv *ippdrv;
- struct drm_exynos_ipp_cmd_node *c_node;
- struct drm_exynos_ipp_property *property;
- int ret;
-
- ippdrv = cmd_work->ippdrv;
- if (!ippdrv) {
- DRM_ERROR("invalid ippdrv list.\n");
- return;
- }
-
- c_node = cmd_work->c_node;
- if (!c_node) {
- DRM_ERROR("invalid command node list.\n");
- return;
- }
-
- mutex_lock(&c_node->lock);
-
- property = &c_node->property;
-
- switch (cmd_work->ctrl) {
- case IPP_CTRL_PLAY:
- case IPP_CTRL_RESUME:
- ret = ipp_start_property(ippdrv, c_node);
- if (ret) {
- DRM_ERROR("failed to start property:prop_id[%d]\n",
- c_node->property.prop_id);
- goto err_unlock;
- }
-
- /*
- * M2M case supports wait_completion of transfer.
- * because M2M case supports single unit operation
- * with multiple queue.
- * M2M need to wait completion of data transfer.
- */
- if (ipp_is_m2m_cmd(property->cmd)) {
- if (!wait_for_completion_timeout
- (&c_node->start_complete, msecs_to_jiffies(200))) {
- DRM_ERROR("timeout event:prop_id[%d]\n",
- c_node->property.prop_id);
- goto err_unlock;
- }
- }
- break;
- case IPP_CTRL_STOP:
- case IPP_CTRL_PAUSE:
- ret = ipp_stop_property(ippdrv->drm_dev, ippdrv,
- c_node);
- if (ret) {
- DRM_ERROR("failed to stop property.\n");
- goto err_unlock;
- }
-
- complete(&c_node->stop_complete);
- break;
- default:
- DRM_ERROR("unknown control type\n");
- break;
- }
-
- DRM_DEBUG_KMS("ctrl[%d] done.\n", cmd_work->ctrl);
-
-err_unlock:
- mutex_unlock(&c_node->lock);
-}
-
-static int ipp_send_event(struct exynos_drm_ippdrv *ippdrv,
- struct drm_exynos_ipp_cmd_node *c_node, int *buf_id)
-{
- struct drm_device *drm_dev = ippdrv->drm_dev;
- struct drm_exynos_ipp_property *property = &c_node->property;
- struct drm_exynos_ipp_mem_node *m_node;
- struct drm_exynos_ipp_queue_buf qbuf;
- struct drm_exynos_ipp_send_event *e;
- struct list_head *head;
- struct timeval now;
- u32 tbuf_id[EXYNOS_DRM_OPS_MAX] = {0, };
- int ret, i;
-
- for_each_ipp_ops(i)
- DRM_DEBUG_KMS("%s buf_id[%d]\n", i ? "dst" : "src", buf_id[i]);
-
- if (!drm_dev) {
- DRM_ERROR("failed to get drm_dev.\n");
- return -EINVAL;
- }
-
- if (!property) {
- DRM_ERROR("failed to get property.\n");
- return -EINVAL;
- }
-
- mutex_lock(&c_node->event_lock);
- if (list_empty(&c_node->event_list)) {
- DRM_DEBUG_KMS("event list is empty.\n");
- ret = 0;
- goto err_event_unlock;
- }
-
- mutex_lock(&c_node->mem_lock);
- if (!ipp_check_mem_list(c_node)) {
- DRM_DEBUG_KMS("empty memory.\n");
- ret = 0;
- goto err_mem_unlock;
- }
-
- /* check command */
- switch (property->cmd) {
- case IPP_CMD_M2M:
- for_each_ipp_ops(i) {
- /* source/destination memory list */
- head = &c_node->mem_list[i];
-
- m_node = list_first_entry(head,
- struct drm_exynos_ipp_mem_node, list);
-
- tbuf_id[i] = m_node->buf_id;
- DRM_DEBUG_KMS("%s buf_id[%d]\n",
- i ? "dst" : "src", tbuf_id[i]);
-
- ret = ipp_put_mem_node(drm_dev, c_node, m_node);
- if (ret)
- DRM_ERROR("failed to put m_node.\n");
- }
- break;
- case IPP_CMD_WB:
- /* clear buf for finding */
- memset(&qbuf, 0x0, sizeof(qbuf));
- qbuf.ops_id = EXYNOS_DRM_OPS_DST;
- qbuf.buf_id = buf_id[EXYNOS_DRM_OPS_DST];
-
- /* get memory node entry */
- m_node = ipp_find_mem_node(c_node, &qbuf);
- if (!m_node) {
- DRM_ERROR("empty memory node.\n");
- ret = -ENOMEM;
- goto err_mem_unlock;
- }
-
- tbuf_id[EXYNOS_DRM_OPS_DST] = m_node->buf_id;
-
- ret = ipp_put_mem_node(drm_dev, c_node, m_node);
- if (ret)
- DRM_ERROR("failed to put m_node.\n");
- break;
- case IPP_CMD_OUTPUT:
- /* source memory list */
- head = &c_node->mem_list[EXYNOS_DRM_OPS_SRC];
-
- m_node = list_first_entry(head,
- struct drm_exynos_ipp_mem_node, list);
-
- tbuf_id[EXYNOS_DRM_OPS_SRC] = m_node->buf_id;
-
- ret = ipp_put_mem_node(drm_dev, c_node, m_node);
- if (ret)
- DRM_ERROR("failed to put m_node.\n");
- break;
- default:
- DRM_ERROR("invalid operations.\n");
- ret = -EINVAL;
- goto err_mem_unlock;
- }
- mutex_unlock(&c_node->mem_lock);
-
- if (tbuf_id[EXYNOS_DRM_OPS_DST] != buf_id[EXYNOS_DRM_OPS_DST])
- DRM_ERROR("failed to match buf_id[%d %d]prop_id[%d]\n",
- tbuf_id[1], buf_id[1], property->prop_id);
-
- /*
- * command node have event list of destination buffer
- * If destination buffer enqueue to mem list,
- * then we make event and link to event list tail.
- * so, we get first event for first enqueued buffer.
- */
- e = list_first_entry(&c_node->event_list,
- struct drm_exynos_ipp_send_event, base.link);
-
- do_gettimeofday(&now);
- DRM_DEBUG_KMS("tv_sec[%ld]tv_usec[%ld]\n", now.tv_sec, now.tv_usec);
- e->event.tv_sec = now.tv_sec;
- e->event.tv_usec = now.tv_usec;
- e->event.prop_id = property->prop_id;
-
- /* set buffer id about source destination */
- for_each_ipp_ops(i)
- e->event.buf_id[i] = tbuf_id[i];
-
- drm_send_event(drm_dev, &e->base);
- mutex_unlock(&c_node->event_lock);
-
- DRM_DEBUG_KMS("done cmd[%d]prop_id[%d]buf_id[%d]\n",
- property->cmd, property->prop_id, tbuf_id[EXYNOS_DRM_OPS_DST]);
-
- return 0;
-
-err_mem_unlock:
- mutex_unlock(&c_node->mem_lock);
-err_event_unlock:
- mutex_unlock(&c_node->event_lock);
- return ret;
-}
-
-void ipp_sched_event(struct work_struct *work)
-{
- struct drm_exynos_ipp_event_work *event_work =
- container_of(work, struct drm_exynos_ipp_event_work, work);
- struct exynos_drm_ippdrv *ippdrv;
- struct drm_exynos_ipp_cmd_node *c_node;
- int ret;
-
- if (!event_work) {
- DRM_ERROR("failed to get event_work.\n");
- return;
- }
-
- DRM_DEBUG_KMS("buf_id[%d]\n", event_work->buf_id[EXYNOS_DRM_OPS_DST]);
-
- ippdrv = event_work->ippdrv;
- if (!ippdrv) {
- DRM_ERROR("failed to get ipp driver.\n");
- return;
- }
-
- c_node = ippdrv->c_node;
- if (!c_node) {
- DRM_ERROR("failed to get command node.\n");
- return;
- }
-
- /*
- * IPP supports command thread, event thread synchronization.
- * If IPP close immediately from user land, then IPP make
- * synchronization with command thread, so make complete event.
- * or going out operations.
- */
- if (c_node->state != IPP_STATE_START) {
- DRM_DEBUG_KMS("bypass state[%d]prop_id[%d]\n",
- c_node->state, c_node->property.prop_id);
- goto err_completion;
- }
-
- ret = ipp_send_event(ippdrv, c_node, event_work->buf_id);
- if (ret) {
- DRM_ERROR("failed to send event.\n");
- goto err_completion;
- }
-
-err_completion:
- if (ipp_is_m2m_cmd(c_node->property.cmd))
- complete(&c_node->start_complete);
-}
-
-static int ipp_subdrv_probe(struct drm_device *drm_dev, struct device *dev)
-{
- struct ipp_context *ctx = get_ipp_context(dev);
- struct exynos_drm_ippdrv *ippdrv;
- int ret, count = 0;
-
- /* get ipp driver entry */
- list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list) {
- ippdrv->drm_dev = drm_dev;
-
- ret = ipp_create_id(&ctx->ipp_idr, &ctx->ipp_lock, ippdrv);
- if (ret < 0) {
- DRM_ERROR("failed to create id.\n");
- goto err;
- }
- ippdrv->prop_list.ipp_id = ret;
-
- DRM_DEBUG_KMS("count[%d]ippdrv[%pK]ipp_id[%d]\n",
- count++, ippdrv, ret);
-
- /* store parent device for node */
- ippdrv->parent_dev = dev;
-
- /* store event work queue and handler */
- ippdrv->event_workq = ctx->event_workq;
- ippdrv->sched_event = ipp_sched_event;
- INIT_LIST_HEAD(&ippdrv->cmd_list);
- mutex_init(&ippdrv->cmd_lock);
-
- ret = drm_iommu_attach_device(drm_dev, ippdrv->dev);
- if (ret) {
- DRM_ERROR("failed to activate iommu\n");
- goto err;
- }
- }
-
- return 0;
-
-err:
- /* get ipp driver entry */
- list_for_each_entry_continue_reverse(ippdrv, &exynos_drm_ippdrv_list,
- drv_list) {
- drm_iommu_detach_device(drm_dev, ippdrv->dev);
-
- ipp_remove_id(&ctx->ipp_idr, &ctx->ipp_lock,
- ippdrv->prop_list.ipp_id);
- }
-
- return ret;
-}
-
-static void ipp_subdrv_remove(struct drm_device *drm_dev, struct device *dev)
-{
- struct exynos_drm_ippdrv *ippdrv, *t;
- struct ipp_context *ctx = get_ipp_context(dev);
-
- /* get ipp driver entry */
- list_for_each_entry_safe(ippdrv, t, &exynos_drm_ippdrv_list, drv_list) {
- drm_iommu_detach_device(drm_dev, ippdrv->dev);
-
- ipp_remove_id(&ctx->ipp_idr, &ctx->ipp_lock,
- ippdrv->prop_list.ipp_id);
-
- ippdrv->drm_dev = NULL;
- exynos_drm_ippdrv_unregister(ippdrv);
- }
-}
-
-static int ipp_subdrv_open(struct drm_device *drm_dev, struct device *dev,
- struct drm_file *file)
-{
- struct drm_exynos_file_private *file_priv = file->driver_priv;
-
- file_priv->ipp_dev = dev;
-
- DRM_DEBUG_KMS("done priv[%pK]\n", dev);
-
- return 0;
-}
-
-static void ipp_subdrv_close(struct drm_device *drm_dev, struct device *dev,
- struct drm_file *file)
-{
- struct exynos_drm_ippdrv *ippdrv = NULL;
- struct ipp_context *ctx = get_ipp_context(dev);
- struct drm_exynos_ipp_cmd_node *c_node, *tc_node;
- int count = 0;
-
- list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list) {
- mutex_lock(&ippdrv->cmd_lock);
- list_for_each_entry_safe(c_node, tc_node,
- &ippdrv->cmd_list, list) {
- DRM_DEBUG_KMS("count[%d]ippdrv[%pK]\n",
- count++, ippdrv);
-
- if (c_node->filp == file) {
- /*
- * userland goto unnormal state. process killed.
- * and close the file.
- * so, IPP didn't called stop cmd ctrl.
- * so, we are make stop operation in this state.
- */
- if (c_node->state == IPP_STATE_START) {
- ipp_stop_property(drm_dev, ippdrv,
- c_node);
- c_node->state = IPP_STATE_STOP;
- }
-
- ippdrv->dedicated = false;
- ipp_clean_cmd_node(ctx, c_node);
- if (list_empty(&ippdrv->cmd_list))
- pm_runtime_put_sync(ippdrv->dev);
- }
- }
- mutex_unlock(&ippdrv->cmd_lock);
- }
-
- return;
-}
-
-static int ipp_probe(struct platform_device *pdev)
-{
- struct device *dev = &pdev->dev;
- struct ipp_context *ctx;
- struct exynos_drm_subdrv *subdrv;
- int ret;
-
- ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
- if (!ctx)
- return -ENOMEM;
-
- mutex_init(&ctx->ipp_lock);
- mutex_init(&ctx->prop_lock);
-
- idr_init(&ctx->ipp_idr);
- idr_init(&ctx->prop_idr);
-
- /*
- * create single thread for ipp event
- * IPP supports event thread for IPP drivers.
- * IPP driver send event_work to this thread.
- * and IPP event thread send event to user process.
- */
- ctx->event_workq = create_singlethread_workqueue("ipp_event");
- if (!ctx->event_workq) {
- dev_err(dev, "failed to create event workqueue\n");
- return -EINVAL;
- }
-
- /*
- * create single thread for ipp command
- * IPP supports command thread for user process.
- * user process make command node using set property ioctl.
- * and make start_work and send this work to command thread.
- * and then this command thread start property.
- */
- ctx->cmd_workq = create_singlethread_workqueue("ipp_cmd");
- if (!ctx->cmd_workq) {
- dev_err(dev, "failed to create cmd workqueue\n");
- ret = -EINVAL;
- goto err_event_workq;
- }
-
- /* set sub driver informations */
- subdrv = &ctx->subdrv;
- subdrv->dev = dev;
- subdrv->probe = ipp_subdrv_probe;
- subdrv->remove = ipp_subdrv_remove;
- subdrv->open = ipp_subdrv_open;
- subdrv->close = ipp_subdrv_close;
-
- platform_set_drvdata(pdev, ctx);
-
- ret = exynos_drm_subdrv_register(subdrv);
- if (ret < 0) {
- DRM_ERROR("failed to register drm ipp device.\n");
- goto err_cmd_workq;
- }
-
- dev_info(dev, "drm ipp registered successfully.\n");
-
- return 0;
-
-err_cmd_workq:
- destroy_workqueue(ctx->cmd_workq);
-err_event_workq:
- destroy_workqueue(ctx->event_workq);
- return ret;
-}
-
-static int ipp_remove(struct platform_device *pdev)
-{
- struct ipp_context *ctx = platform_get_drvdata(pdev);
-
- /* unregister sub driver */
- exynos_drm_subdrv_unregister(&ctx->subdrv);
-
- /* remove,destroy ipp idr */
- idr_destroy(&ctx->ipp_idr);
- idr_destroy(&ctx->prop_idr);
-
- mutex_destroy(&ctx->ipp_lock);
- mutex_destroy(&ctx->prop_lock);
-
- /* destroy command, event work queue */
- destroy_workqueue(ctx->cmd_workq);
- destroy_workqueue(ctx->event_workq);
-
- return 0;
-}
-
-struct platform_driver ipp_driver = {
- .probe = ipp_probe,
- .remove = ipp_remove,
- .driver = {
- .name = "exynos-drm-ipp",
- .owner = THIS_MODULE,
- },
-};
-
diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.h b/drivers/gpu/drm/exynos/exynos_drm_ipp.h
deleted file mode 100644
index 2a61547a39d0..000000000000
--- a/drivers/gpu/drm/exynos/exynos_drm_ipp.h
+++ /dev/null
@@ -1,252 +0,0 @@
-/*
- * Copyright (c) 2012 Samsung Electronics Co., Ltd.
- *
- * Authors:
- * Eunchul Kim <[email protected]>
- * Jinyoung Jeon <[email protected]>
- * Sangmin Lee <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-#ifndef _EXYNOS_DRM_IPP_H_
-#define _EXYNOS_DRM_IPP_H_
-
-#define for_each_ipp_ops(pos) \
- for (pos = 0; pos < EXYNOS_DRM_OPS_MAX; pos++)
-#define for_each_ipp_planar(pos) \
- for (pos = 0; pos < EXYNOS_DRM_PLANAR_MAX; pos++)
-
-#define IPP_GET_LCD_WIDTH _IOR('F', 302, int)
-#define IPP_GET_LCD_HEIGHT _IOR('F', 303, int)
-#define IPP_SET_WRITEBACK _IOW('F', 304, u32)
-
-/* definition of state */
-enum drm_exynos_ipp_state {
- IPP_STATE_IDLE,
- IPP_STATE_START,
- IPP_STATE_STOP,
-};
-
-/*
- * A structure of command work information.
- * @work: work structure.
- * @ippdrv: current work ippdrv.
- * @c_node: command node information.
- * @ctrl: command control.
- */
-struct drm_exynos_ipp_cmd_work {
- struct work_struct work;
- struct exynos_drm_ippdrv *ippdrv;
- struct drm_exynos_ipp_cmd_node *c_node;
- enum drm_exynos_ipp_ctrl ctrl;
-};
-
-/*
- * A structure of command node.
- *
- * @list: list head to command queue information.
- * @event_list: list head of event.
- * @mem_list: list head to source,destination memory queue information.
- * @lock: lock for synchronization of access to ioctl.
- * @mem_lock: lock for synchronization of access to memory nodes.
- * @event_lock: lock for synchronization of access to scheduled event.
- * @start_complete: completion of start of command.
- * @stop_complete: completion of stop of command.
- * @property: property information.
- * @start_work: start command work structure.
- * @stop_work: stop command work structure.
- * @event_work: event work structure.
- * @state: state of command node.
- * @filp: associated file pointer.
- */
-struct drm_exynos_ipp_cmd_node {
- struct list_head list;
- struct list_head event_list;
- struct list_head mem_list[EXYNOS_DRM_OPS_MAX];
- struct mutex lock;
- struct mutex mem_lock;
- struct mutex event_lock;
- struct completion start_complete;
- struct completion stop_complete;
- struct drm_exynos_ipp_property property;
- struct drm_exynos_ipp_cmd_work *start_work;
- struct drm_exynos_ipp_cmd_work *stop_work;
- struct drm_exynos_ipp_event_work *event_work;
- enum drm_exynos_ipp_state state;
- struct drm_file *filp;
-};
-
-/*
- * A structure of buffer information.
- *
- * @handles: Y, Cb, Cr each gem object handle.
- * @base: Y, Cb, Cr each planar address.
- */
-struct drm_exynos_ipp_buf_info {
- unsigned long handles[EXYNOS_DRM_PLANAR_MAX];
- dma_addr_t base[EXYNOS_DRM_PLANAR_MAX];
-};
-
-/*
- * A structure of wb setting information.
- *
- * @enable: enable flag for wb.
- * @refresh: HZ of the refresh rate.
- */
-struct drm_exynos_ipp_set_wb {
- __u32 enable;
- __u32 refresh;
-};
-
-/*
- * A structure of event work information.
- *
- * @work: work structure.
- * @ippdrv: current work ippdrv.
- * @buf_id: id of src, dst buffer.
- */
-struct drm_exynos_ipp_event_work {
- struct work_struct work;
- struct exynos_drm_ippdrv *ippdrv;
- u32 buf_id[EXYNOS_DRM_OPS_MAX];
-};
-
-/*
- * A structure of source,destination operations.
- *
- * @set_fmt: set format of image.
- * @set_transf: set transform(rotations, flip).
- * @set_size: set size of region.
- * @set_addr: set address for dma.
- */
-struct exynos_drm_ipp_ops {
- int (*set_fmt)(struct device *dev, u32 fmt);
- int (*set_transf)(struct device *dev,
- enum drm_exynos_degree degree,
- enum drm_exynos_flip flip, bool *swap);
- int (*set_size)(struct device *dev, int swap,
- struct drm_exynos_pos *pos, struct drm_exynos_sz *sz);
- int (*set_addr)(struct device *dev,
- struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id,
- enum drm_exynos_ipp_buf_type buf_type);
-};
-
-/*
- * A structure of ipp driver.
- *
- * @drv_list: list head for registed sub driver information.
- * @parent_dev: parent device information.
- * @dev: platform device.
- * @drm_dev: drm device.
- * @dedicated: dedicated ipp device.
- * @ops: source, destination operations.
- * @event_workq: event work queue.
- * @c_node: current command information.
- * @cmd_list: list head for command information.
- * @cmd_lock: lock for synchronization of access to cmd_list.
- * @prop_list: property informations of current ipp driver.
- * @check_property: check property about format, size, buffer.
- * @reset: reset ipp block.
- * @start: ipp each device start.
- * @stop: ipp each device stop.
- * @sched_event: work schedule handler.
- */
-struct exynos_drm_ippdrv {
- struct list_head drv_list;
- struct device *parent_dev;
- struct device *dev;
- struct drm_device *drm_dev;
- bool dedicated;
- struct exynos_drm_ipp_ops *ops[EXYNOS_DRM_OPS_MAX];
- struct workqueue_struct *event_workq;
- struct drm_exynos_ipp_cmd_node *c_node;
- struct list_head cmd_list;
- struct mutex cmd_lock;
- struct drm_exynos_ipp_prop_list prop_list;
-
- int (*check_property)(struct device *dev,
- struct drm_exynos_ipp_property *property);
- int (*reset)(struct device *dev);
- int (*start)(struct device *dev, enum drm_exynos_ipp_cmd cmd);
- void (*stop)(struct device *dev, enum drm_exynos_ipp_cmd cmd);
- void (*sched_event)(struct work_struct *work);
-};
-
-#ifdef CONFIG_DRM_EXYNOS_IPP
-extern int exynos_drm_ippdrv_register(struct exynos_drm_ippdrv *ippdrv);
-extern int exynos_drm_ippdrv_unregister(struct exynos_drm_ippdrv *ippdrv);
-extern int exynos_drm_ipp_get_property(struct drm_device *drm_dev, void *data,
- struct drm_file *file);
-extern int exynos_drm_ipp_set_property(struct drm_device *drm_dev, void *data,
- struct drm_file *file);
-extern int exynos_drm_ipp_queue_buf(struct drm_device *drm_dev, void *data,
- struct drm_file *file);
-extern int exynos_drm_ipp_cmd_ctrl(struct drm_device *drm_dev, void *data,
- struct drm_file *file);
-extern int exynos_drm_ippnb_register(struct notifier_block *nb);
-extern int exynos_drm_ippnb_unregister(struct notifier_block *nb);
-extern int exynos_drm_ippnb_send_event(unsigned long val, void *v);
-extern void ipp_sched_cmd(struct work_struct *work);
-extern void ipp_sched_event(struct work_struct *work);
-
-#else
-static inline int exynos_drm_ippdrv_register(struct exynos_drm_ippdrv *ippdrv)
-{
- return -ENODEV;
-}
-
-static inline int exynos_drm_ippdrv_unregister(struct exynos_drm_ippdrv *ippdrv)
-{
- return -ENODEV;
-}
-
-static inline int exynos_drm_ipp_get_property(struct drm_device *drm_dev,
- void *data,
- struct drm_file *file_priv)
-{
- return -ENOTTY;
-}
-
-static inline int exynos_drm_ipp_set_property(struct drm_device *drm_dev,
- void *data,
- struct drm_file *file_priv)
-{
- return -ENOTTY;
-}
-
-static inline int exynos_drm_ipp_queue_buf(struct drm_device *drm_dev,
- void *data,
- struct drm_file *file)
-{
- return -ENOTTY;
-}
-
-static inline int exynos_drm_ipp_cmd_ctrl(struct drm_device *drm_dev,
- void *data,
- struct drm_file *file)
-{
- return -ENOTTY;
-}
-
-static inline int exynos_drm_ippnb_register(struct notifier_block *nb)
-{
- return -ENODEV;
-}
-
-static inline int exynos_drm_ippnb_unregister(struct notifier_block *nb)
-{
- return -ENODEV;
-}
-
-static inline int exynos_drm_ippnb_send_event(unsigned long val, void *v)
-{
- return -ENOTTY;
-}
-#endif
-
-#endif /* _EXYNOS_DRM_IPP_H_ */
-
diff --git a/include/video/exynos5433_decon.h b/drivers/gpu/drm/exynos/regs-decon5433.h
index 78957c9626f5..19ad9e47945e 100644
--- a/include/video/exynos5433_decon.h
+++ b/drivers/gpu/drm/exynos/regs-decon5433.h
@@ -6,8 +6,8 @@
* published by the Free Software Foundationr
*/
-#ifndef EXYNOS_REGS_DECON_H
-#define EXYNOS_REGS_DECON_H
+#ifndef EXYNOS_REGS_DECON5433_H
+#define EXYNOS_REGS_DECON5433_H
/* Exynos543X DECON */
#define DECON_VIDCON0 0x0000
@@ -206,4 +206,4 @@
#define CRCCTRL_CRCEN (0x1 << 0)
#define CRCCTRL_MASK (0x7)
-#endif /* EXYNOS_REGS_DECON_H */
+#endif /* EXYNOS_REGS_DECON5433_H */
diff --git a/include/video/exynos7_decon.h b/drivers/gpu/drm/exynos/regs-decon7.h
index a62b11b613f6..5df7765d2397 100644
--- a/include/video/exynos7_decon.h
+++ b/drivers/gpu/drm/exynos/regs-decon7.h
@@ -1,5 +1,4 @@
-/* include/video/exynos7_decon.h
- *
+/*
* Copyright (c) 2014 Samsung Electronics Co., Ltd.
* Author: Ajay Kumar <[email protected]>
*
@@ -9,6 +8,9 @@
* option) any later version.
*/
+#ifndef EXYNOS_REGS_DECON7_H
+#define EXYNOS_REGS_DECON7_H
+
/* VIDCON0 */
#define VIDCON0 0x00
@@ -347,3 +349,5 @@
#define DECON_UPDATE_SLAVE_SYNC (1 << 4)
#define DECON_UPDATE_STANDALONE_F (1 << 0)
+
+#endif /* EXYNOS_REGS_DECON7_H */
diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index fa36491495b1..108d21f34777 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -29,7 +29,6 @@ config DRM_I915_DEBUG
select SW_SYNC # signaling validation framework (igt/syncobj*)
select DRM_I915_SW_FENCE_DEBUG_OBJECTS
select DRM_I915_SELFTEST
- select DRM_I915_TRACE_GEM
default n
help
Choose this option to turn on extra driver debugging that may affect
@@ -53,6 +52,7 @@ config DRM_I915_DEBUG_GEM
config DRM_I915_TRACE_GEM
bool "Insert extra ftrace output from the GEM internals"
+ depends on DRM_I915_DEBUG_GEM
select TRACING
default n
help
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index 18c45734c7a2..edec15d19538 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -825,6 +825,21 @@ static int force_nonpriv_reg_handler(struct parser_exec_state *s,
return 0;
}
+static inline bool is_mocs_mmio(unsigned int offset)
+{
+ return ((offset >= 0xc800) && (offset <= 0xcff8)) ||
+ ((offset >= 0xb020) && (offset <= 0xb0a0));
+}
+
+static int mocs_cmd_reg_handler(struct parser_exec_state *s,
+ unsigned int offset, unsigned int index)
+{
+ if (!is_mocs_mmio(offset))
+ return -EINVAL;
+ vgpu_vreg(s->vgpu, offset) = cmd_val(s, index + 1);
+ return 0;
+}
+
static int cmd_reg_handler(struct parser_exec_state *s,
unsigned int offset, unsigned int index, char *cmd)
{
@@ -848,6 +863,10 @@ static int cmd_reg_handler(struct parser_exec_state *s,
return 0;
}
+ if (is_mocs_mmio(offset) &&
+ mocs_cmd_reg_handler(s, offset, index))
+ return -EINVAL;
+
if (is_force_nonpriv_mmio(offset) &&
force_nonpriv_reg_handler(s, offset, index))
return -EPERM;
@@ -1220,13 +1239,13 @@ static int gen8_check_mi_display_flip(struct parser_exec_state *s,
return 0;
if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
- stride = vgpu_vreg(s->vgpu, info->stride_reg) & GENMASK(9, 0);
- tile = (vgpu_vreg(s->vgpu, info->ctrl_reg) &
+ stride = vgpu_vreg_t(s->vgpu, info->stride_reg) & GENMASK(9, 0);
+ tile = (vgpu_vreg_t(s->vgpu, info->ctrl_reg) &
GENMASK(12, 10)) >> 10;
} else {
- stride = (vgpu_vreg(s->vgpu, info->stride_reg) &
+ stride = (vgpu_vreg_t(s->vgpu, info->stride_reg) &
GENMASK(15, 6)) >> 6;
- tile = (vgpu_vreg(s->vgpu, info->ctrl_reg) & (1 << 10)) >> 10;
+ tile = (vgpu_vreg_t(s->vgpu, info->ctrl_reg) & (1 << 10)) >> 10;
}
if (stride != info->stride_val)
@@ -1245,21 +1264,21 @@ static int gen8_update_plane_mmio_from_mi_display_flip(
struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv;
struct intel_vgpu *vgpu = s->vgpu;
- set_mask_bits(&vgpu_vreg(vgpu, info->surf_reg), GENMASK(31, 12),
+ set_mask_bits(&vgpu_vreg_t(vgpu, info->surf_reg), GENMASK(31, 12),
info->surf_val << 12);
if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
- set_mask_bits(&vgpu_vreg(vgpu, info->stride_reg), GENMASK(9, 0),
+ set_mask_bits(&vgpu_vreg_t(vgpu, info->stride_reg), GENMASK(9, 0),
info->stride_val);
- set_mask_bits(&vgpu_vreg(vgpu, info->ctrl_reg), GENMASK(12, 10),
+ set_mask_bits(&vgpu_vreg_t(vgpu, info->ctrl_reg), GENMASK(12, 10),
info->tile_val << 10);
} else {
- set_mask_bits(&vgpu_vreg(vgpu, info->stride_reg), GENMASK(15, 6),
+ set_mask_bits(&vgpu_vreg_t(vgpu, info->stride_reg), GENMASK(15, 6),
info->stride_val << 6);
- set_mask_bits(&vgpu_vreg(vgpu, info->ctrl_reg), GENMASK(10, 10),
+ set_mask_bits(&vgpu_vreg_t(vgpu, info->ctrl_reg), GENMASK(10, 10),
info->tile_val << 10);
}
- vgpu_vreg(vgpu, PIPE_FRMCOUNT_G4X(info->pipe))++;
+ vgpu_vreg_t(vgpu, PIPE_FRMCOUNT_G4X(info->pipe))++;
intel_vgpu_trigger_virtual_event(vgpu, info->event);
return 0;
}
diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c
index 09185036bac8..dd96ffc878ac 100644
--- a/drivers/gpu/drm/i915/gvt/display.c
+++ b/drivers/gpu/drm/i915/gvt/display.c
@@ -59,7 +59,7 @@ static int edp_pipe_is_enabled(struct intel_vgpu *vgpu)
{
struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
- if (!(vgpu_vreg(vgpu, PIPECONF(_PIPE_EDP)) & PIPECONF_ENABLE))
+ if (!(vgpu_vreg_t(vgpu, PIPECONF(_PIPE_EDP)) & PIPECONF_ENABLE))
return 0;
if (!(vgpu_vreg(vgpu, _TRANS_DDI_FUNC_CTL_EDP) & TRANS_DDI_FUNC_ENABLE))
@@ -74,7 +74,7 @@ int pipe_is_enabled(struct intel_vgpu *vgpu, int pipe)
if (WARN_ON(pipe < PIPE_A || pipe >= I915_MAX_PIPES))
return -EINVAL;
- if (vgpu_vreg(vgpu, PIPECONF(pipe)) & PIPECONF_ENABLE)
+ if (vgpu_vreg_t(vgpu, PIPECONF(pipe)) & PIPECONF_ENABLE)
return 1;
if (edp_pipe_is_enabled(vgpu) &&
@@ -169,103 +169,105 @@ static u8 dpcd_fix_data[DPCD_HEADER_SIZE] = {
static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
{
struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
- vgpu_vreg(vgpu, SDEISR) &= ~(SDE_PORTB_HOTPLUG_CPT |
+ vgpu_vreg_t(vgpu, SDEISR) &= ~(SDE_PORTB_HOTPLUG_CPT |
SDE_PORTC_HOTPLUG_CPT |
SDE_PORTD_HOTPLUG_CPT);
if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
- vgpu_vreg(vgpu, SDEISR) &= ~(SDE_PORTA_HOTPLUG_SPT |
+ vgpu_vreg_t(vgpu, SDEISR) &= ~(SDE_PORTA_HOTPLUG_SPT |
SDE_PORTE_HOTPLUG_SPT);
- vgpu_vreg(vgpu, SKL_FUSE_STATUS) |=
+ vgpu_vreg_t(vgpu, SKL_FUSE_STATUS) |=
SKL_FUSE_DOWNLOAD_STATUS |
SKL_FUSE_PG_DIST_STATUS(SKL_PG0) |
SKL_FUSE_PG_DIST_STATUS(SKL_PG1) |
SKL_FUSE_PG_DIST_STATUS(SKL_PG2);
- vgpu_vreg(vgpu, LCPLL1_CTL) |=
+ vgpu_vreg_t(vgpu, LCPLL1_CTL) |=
LCPLL_PLL_ENABLE |
LCPLL_PLL_LOCK;
- vgpu_vreg(vgpu, LCPLL2_CTL) |= LCPLL_PLL_ENABLE;
+ vgpu_vreg_t(vgpu, LCPLL2_CTL) |= LCPLL_PLL_ENABLE;
}
if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) {
- vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIB_DETECTED;
- vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
+ vgpu_vreg_t(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIB_DETECTED;
+ vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
TRANS_DDI_PORT_MASK);
- vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |=
+ vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |=
(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
(PORT_B << TRANS_DDI_PORT_SHIFT) |
TRANS_DDI_FUNC_ENABLE);
if (IS_BROADWELL(dev_priv)) {
- vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_B)) &=
+ vgpu_vreg_t(vgpu, PORT_CLK_SEL(PORT_B)) &=
~PORT_CLK_SEL_MASK;
- vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_B)) |=
+ vgpu_vreg_t(vgpu, PORT_CLK_SEL(PORT_B)) |=
PORT_CLK_SEL_LCPLL_810;
}
- vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_B)) |= DDI_BUF_CTL_ENABLE;
- vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_B)) &= ~DDI_BUF_IS_IDLE;
- vgpu_vreg(vgpu, SDEISR) |= SDE_PORTB_HOTPLUG_CPT;
+ vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_B)) |= DDI_BUF_CTL_ENABLE;
+ vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_B)) &= ~DDI_BUF_IS_IDLE;
+ vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTB_HOTPLUG_CPT;
}
if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) {
- vgpu_vreg(vgpu, SDEISR) |= SDE_PORTC_HOTPLUG_CPT;
- vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
+ vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTC_HOTPLUG_CPT;
+ vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
TRANS_DDI_PORT_MASK);
- vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |=
+ vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |=
(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
(PORT_C << TRANS_DDI_PORT_SHIFT) |
TRANS_DDI_FUNC_ENABLE);
if (IS_BROADWELL(dev_priv)) {
- vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_C)) &=
+ vgpu_vreg_t(vgpu, PORT_CLK_SEL(PORT_C)) &=
~PORT_CLK_SEL_MASK;
- vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_C)) |=
+ vgpu_vreg_t(vgpu, PORT_CLK_SEL(PORT_C)) |=
PORT_CLK_SEL_LCPLL_810;
}
- vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_C)) |= DDI_BUF_CTL_ENABLE;
- vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_C)) &= ~DDI_BUF_IS_IDLE;
- vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIC_DETECTED;
+ vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_C)) |= DDI_BUF_CTL_ENABLE;
+ vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_C)) &= ~DDI_BUF_IS_IDLE;
+ vgpu_vreg_t(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIC_DETECTED;
}
if (intel_vgpu_has_monitor_on_port(vgpu, PORT_D)) {
- vgpu_vreg(vgpu, SDEISR) |= SDE_PORTD_HOTPLUG_CPT;
- vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
+ vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTD_HOTPLUG_CPT;
+ vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
TRANS_DDI_PORT_MASK);
- vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |=
+ vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |=
(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
(PORT_D << TRANS_DDI_PORT_SHIFT) |
TRANS_DDI_FUNC_ENABLE);
if (IS_BROADWELL(dev_priv)) {
- vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_D)) &=
+ vgpu_vreg_t(vgpu, PORT_CLK_SEL(PORT_D)) &=
~PORT_CLK_SEL_MASK;
- vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_D)) |=
+ vgpu_vreg_t(vgpu, PORT_CLK_SEL(PORT_D)) |=
PORT_CLK_SEL_LCPLL_810;
}
- vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_D)) |= DDI_BUF_CTL_ENABLE;
- vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_D)) &= ~DDI_BUF_IS_IDLE;
- vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDID_DETECTED;
+ vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_D)) |= DDI_BUF_CTL_ENABLE;
+ vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_D)) &= ~DDI_BUF_IS_IDLE;
+ vgpu_vreg_t(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDID_DETECTED;
}
if ((IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) &&
intel_vgpu_has_monitor_on_port(vgpu, PORT_E)) {
- vgpu_vreg(vgpu, SDEISR) |= SDE_PORTE_HOTPLUG_SPT;
+ vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTE_HOTPLUG_SPT;
}
if (intel_vgpu_has_monitor_on_port(vgpu, PORT_A)) {
if (IS_BROADWELL(dev_priv))
- vgpu_vreg(vgpu, GEN8_DE_PORT_ISR) |=
+ vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |=
GEN8_PORT_DP_A_HOTPLUG;
else
- vgpu_vreg(vgpu, SDEISR) |= SDE_PORTA_HOTPLUG_SPT;
+ vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTA_HOTPLUG_SPT;
- vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_A)) |= DDI_INIT_DISPLAY_DETECTED;
+ vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_A)) |= DDI_INIT_DISPLAY_DETECTED;
}
/* Clear host CRT status, so guest couldn't detect this host CRT. */
if (IS_BROADWELL(dev_priv))
- vgpu_vreg(vgpu, PCH_ADPA) &= ~ADPA_CRT_HOTPLUG_MONITOR_MASK;
+ vgpu_vreg_t(vgpu, PCH_ADPA) &= ~ADPA_CRT_HOTPLUG_MONITOR_MASK;
+
+ vgpu_vreg_t(vgpu, PIPECONF(PIPE_A)) |= PIPECONF_ENABLE;
}
static void clean_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num)
@@ -282,7 +284,6 @@ static void clean_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num)
static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num,
int type, unsigned int resolution)
{
- struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num);
if (WARN_ON(resolution >= GVT_EDID_NUM))
@@ -308,7 +309,7 @@ static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num,
port->type = type;
emulate_monitor_status_change(vgpu);
- vgpu_vreg(vgpu, PIPECONF(PIPE_A)) |= PIPECONF_ENABLE;
+
return 0;
}
@@ -368,12 +369,12 @@ static void emulate_vblank_on_pipe(struct intel_vgpu *vgpu, int pipe)
if (!pipe_is_enabled(vgpu, pipe))
continue;
- vgpu_vreg(vgpu, PIPE_FLIPCOUNT_G4X(pipe))++;
+ vgpu_vreg_t(vgpu, PIPE_FLIPCOUNT_G4X(pipe))++;
intel_vgpu_trigger_virtual_event(vgpu, event);
}
if (pipe_is_enabled(vgpu, pipe)) {
- vgpu_vreg(vgpu, PIPE_FRMCOUNT_G4X(pipe))++;
+ vgpu_vreg_t(vgpu, PIPE_FRMCOUNT_G4X(pipe))++;
intel_vgpu_trigger_virtual_event(vgpu, vblank_event[pipe]);
}
}
diff --git a/drivers/gpu/drm/i915/gvt/edid.c b/drivers/gpu/drm/i915/gvt/edid.c
index 42cd09ec63fa..f61337632969 100644
--- a/drivers/gpu/drm/i915/gvt/edid.c
+++ b/drivers/gpu/drm/i915/gvt/edid.c
@@ -95,9 +95,9 @@ static inline int get_port_from_gmbus0(u32 gmbus0)
static void reset_gmbus_controller(struct intel_vgpu *vgpu)
{
- vgpu_vreg(vgpu, PCH_GMBUS2) = GMBUS_HW_RDY;
+ vgpu_vreg_t(vgpu, PCH_GMBUS2) = GMBUS_HW_RDY;
if (!vgpu->display.i2c_edid.edid_available)
- vgpu_vreg(vgpu, PCH_GMBUS2) |= GMBUS_SATOER;
+ vgpu_vreg_t(vgpu, PCH_GMBUS2) |= GMBUS_SATOER;
vgpu->display.i2c_edid.gmbus.phase = GMBUS_IDLE_PHASE;
}
@@ -123,16 +123,16 @@ static int gmbus0_mmio_write(struct intel_vgpu *vgpu,
vgpu->display.i2c_edid.state = I2C_GMBUS;
vgpu->display.i2c_edid.gmbus.phase = GMBUS_IDLE_PHASE;
- vgpu_vreg(vgpu, PCH_GMBUS2) &= ~GMBUS_ACTIVE;
- vgpu_vreg(vgpu, PCH_GMBUS2) |= GMBUS_HW_RDY | GMBUS_HW_WAIT_PHASE;
+ vgpu_vreg_t(vgpu, PCH_GMBUS2) &= ~GMBUS_ACTIVE;
+ vgpu_vreg_t(vgpu, PCH_GMBUS2) |= GMBUS_HW_RDY | GMBUS_HW_WAIT_PHASE;
if (intel_vgpu_has_monitor_on_port(vgpu, port) &&
!intel_vgpu_port_is_dp(vgpu, port)) {
vgpu->display.i2c_edid.port = port;
vgpu->display.i2c_edid.edid_available = true;
- vgpu_vreg(vgpu, PCH_GMBUS2) &= ~GMBUS_SATOER;
+ vgpu_vreg_t(vgpu, PCH_GMBUS2) &= ~GMBUS_SATOER;
} else
- vgpu_vreg(vgpu, PCH_GMBUS2) |= GMBUS_SATOER;
+ vgpu_vreg_t(vgpu, PCH_GMBUS2) |= GMBUS_SATOER;
return 0;
}
@@ -159,8 +159,8 @@ static int gmbus1_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
* 2) HW_RDY bit asserted
*/
if (wvalue & GMBUS_SW_CLR_INT) {
- vgpu_vreg(vgpu, PCH_GMBUS2) &= ~GMBUS_INT;
- vgpu_vreg(vgpu, PCH_GMBUS2) |= GMBUS_HW_RDY;
+ vgpu_vreg_t(vgpu, PCH_GMBUS2) &= ~GMBUS_INT;
+ vgpu_vreg_t(vgpu, PCH_GMBUS2) |= GMBUS_HW_RDY;
}
/* For virtualization, we suppose that HW is always ready,
@@ -208,7 +208,7 @@ static int gmbus1_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
* visible in gmbus interface)
*/
i2c_edid->gmbus.phase = GMBUS_IDLE_PHASE;
- vgpu_vreg(vgpu, PCH_GMBUS2) &= ~GMBUS_ACTIVE;
+ vgpu_vreg_t(vgpu, PCH_GMBUS2) &= ~GMBUS_ACTIVE;
}
break;
case NIDX_NS_W:
@@ -220,7 +220,7 @@ static int gmbus1_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
* START (-->INDEX) -->DATA
*/
i2c_edid->gmbus.phase = GMBUS_DATA_PHASE;
- vgpu_vreg(vgpu, PCH_GMBUS2) |= GMBUS_ACTIVE;
+ vgpu_vreg_t(vgpu, PCH_GMBUS2) |= GMBUS_ACTIVE;
break;
default:
gvt_vgpu_err("Unknown/reserved GMBUS cycle detected!\n");
@@ -256,7 +256,7 @@ static int gmbus3_mmio_read(struct intel_vgpu *vgpu, unsigned int offset,
u32 reg_data = 0;
/* Data can only be recevied if previous settings correct */
- if (vgpu_vreg(vgpu, PCH_GMBUS1) & GMBUS_SLAVE_READ) {
+ if (vgpu_vreg_t(vgpu, PCH_GMBUS1) & GMBUS_SLAVE_READ) {
if (byte_left <= 0) {
memcpy(p_data, &vgpu_vreg(vgpu, offset), bytes);
return 0;
diff --git a/drivers/gpu/drm/i915/gvt/fb_decoder.c b/drivers/gpu/drm/i915/gvt/fb_decoder.c
index 6cc99543693f..6b50fe78dc1b 100644
--- a/drivers/gpu/drm/i915/gvt/fb_decoder.c
+++ b/drivers/gpu/drm/i915/gvt/fb_decoder.c
@@ -147,7 +147,7 @@ static u32 intel_vgpu_get_stride(struct intel_vgpu *vgpu, int pipe,
{
struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
- u32 stride_reg = vgpu_vreg(vgpu, DSPSTRIDE(pipe)) & stride_mask;
+ u32 stride_reg = vgpu_vreg_t(vgpu, DSPSTRIDE(pipe)) & stride_mask;
u32 stride = stride_reg;
if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
@@ -209,7 +209,7 @@ int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu,
if (pipe >= I915_MAX_PIPES)
return -ENODEV;
- val = vgpu_vreg(vgpu, DSPCNTR(pipe));
+ val = vgpu_vreg_t(vgpu, DSPCNTR(pipe));
plane->enabled = !!(val & DISPLAY_PLANE_ENABLE);
if (!plane->enabled)
return -ENODEV;
@@ -244,7 +244,7 @@ int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu,
plane->hw_format = fmt;
- plane->base = vgpu_vreg(vgpu, DSPSURF(pipe)) & I915_GTT_PAGE_MASK;
+ plane->base = vgpu_vreg_t(vgpu, DSPSURF(pipe)) & I915_GTT_PAGE_MASK;
if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) {
gvt_vgpu_err("invalid gma address: %lx\n",
(unsigned long)plane->base);
@@ -263,14 +263,14 @@ int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu,
(_PRI_PLANE_STRIDE_MASK >> 6) :
_PRI_PLANE_STRIDE_MASK, plane->bpp);
- plane->width = (vgpu_vreg(vgpu, PIPESRC(pipe)) & _PIPE_H_SRCSZ_MASK) >>
+ plane->width = (vgpu_vreg_t(vgpu, PIPESRC(pipe)) & _PIPE_H_SRCSZ_MASK) >>
_PIPE_H_SRCSZ_SHIFT;
plane->width += 1;
- plane->height = (vgpu_vreg(vgpu, PIPESRC(pipe)) &
+ plane->height = (vgpu_vreg_t(vgpu, PIPESRC(pipe)) &
_PIPE_V_SRCSZ_MASK) >> _PIPE_V_SRCSZ_SHIFT;
plane->height += 1; /* raw height is one minus the real value */
- val = vgpu_vreg(vgpu, DSPTILEOFF(pipe));
+ val = vgpu_vreg_t(vgpu, DSPTILEOFF(pipe));
plane->x_offset = (val & _PRI_PLANE_X_OFF_MASK) >>
_PRI_PLANE_X_OFF_SHIFT;
plane->y_offset = (val & _PRI_PLANE_Y_OFF_MASK) >>
@@ -344,7 +344,7 @@ int intel_vgpu_decode_cursor_plane(struct intel_vgpu *vgpu,
if (pipe >= I915_MAX_PIPES)
return -ENODEV;
- val = vgpu_vreg(vgpu, CURCNTR(pipe));
+ val = vgpu_vreg_t(vgpu, CURCNTR(pipe));
mode = val & CURSOR_MODE;
plane->enabled = (mode != CURSOR_MODE_DISABLE);
if (!plane->enabled)
@@ -370,7 +370,7 @@ int intel_vgpu_decode_cursor_plane(struct intel_vgpu *vgpu,
gvt_dbg_core("alpha_plane=0x%x, alpha_force=0x%x\n",
alpha_plane, alpha_force);
- plane->base = vgpu_vreg(vgpu, CURBASE(pipe)) & I915_GTT_PAGE_MASK;
+ plane->base = vgpu_vreg_t(vgpu, CURBASE(pipe)) & I915_GTT_PAGE_MASK;
if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) {
gvt_vgpu_err("invalid gma address: %lx\n",
(unsigned long)plane->base);
@@ -384,7 +384,7 @@ int intel_vgpu_decode_cursor_plane(struct intel_vgpu *vgpu,
return -EINVAL;
}
- val = vgpu_vreg(vgpu, CURPOS(pipe));
+ val = vgpu_vreg_t(vgpu, CURPOS(pipe));
plane->x_pos = (val & _CURSOR_POS_X_MASK) >> _CURSOR_POS_X_SHIFT;
plane->x_sign = (val & _CURSOR_SIGN_X_MASK) >> _CURSOR_SIGN_X_SHIFT;
plane->y_pos = (val & _CURSOR_POS_Y_MASK) >> _CURSOR_POS_Y_SHIFT;
@@ -424,7 +424,7 @@ int intel_vgpu_decode_sprite_plane(struct intel_vgpu *vgpu,
if (pipe >= I915_MAX_PIPES)
return -ENODEV;
- val = vgpu_vreg(vgpu, SPRCTL(pipe));
+ val = vgpu_vreg_t(vgpu, SPRCTL(pipe));
plane->enabled = !!(val & SPRITE_ENABLE);
if (!plane->enabled)
return -ENODEV;
@@ -475,7 +475,7 @@ int intel_vgpu_decode_sprite_plane(struct intel_vgpu *vgpu,
plane->drm_format = drm_format;
- plane->base = vgpu_vreg(vgpu, SPRSURF(pipe)) & I915_GTT_PAGE_MASK;
+ plane->base = vgpu_vreg_t(vgpu, SPRSURF(pipe)) & I915_GTT_PAGE_MASK;
if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) {
gvt_vgpu_err("invalid gma address: %lx\n",
(unsigned long)plane->base);
@@ -489,10 +489,10 @@ int intel_vgpu_decode_sprite_plane(struct intel_vgpu *vgpu,
return -EINVAL;
}
- plane->stride = vgpu_vreg(vgpu, SPRSTRIDE(pipe)) &
+ plane->stride = vgpu_vreg_t(vgpu, SPRSTRIDE(pipe)) &
_SPRITE_STRIDE_MASK;
- val = vgpu_vreg(vgpu, SPRSIZE(pipe));
+ val = vgpu_vreg_t(vgpu, SPRSIZE(pipe));
plane->height = (val & _SPRITE_SIZE_HEIGHT_MASK) >>
_SPRITE_SIZE_HEIGHT_SHIFT;
plane->width = (val & _SPRITE_SIZE_WIDTH_MASK) >>
@@ -500,11 +500,11 @@ int intel_vgpu_decode_sprite_plane(struct intel_vgpu *vgpu,
plane->height += 1; /* raw height is one minus the real value */
plane->width += 1; /* raw width is one minus the real value */
- val = vgpu_vreg(vgpu, SPRPOS(pipe));
+ val = vgpu_vreg_t(vgpu, SPRPOS(pipe));
plane->x_pos = (val & _SPRITE_POS_X_MASK) >> _SPRITE_POS_X_SHIFT;
plane->y_pos = (val & _SPRITE_POS_Y_MASK) >> _SPRITE_POS_Y_SHIFT;
- val = vgpu_vreg(vgpu, SPROFFSET(pipe));
+ val = vgpu_vreg_t(vgpu, SPROFFSET(pipe));
plane->x_offset = (val & _SPRITE_OFFSET_START_X_MASK) >>
_SPRITE_OFFSET_START_X_SHIFT;
plane->y_offset = (val & _SPRITE_OFFSET_START_Y_MASK) >>
diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index 71a0f2b87b3a..c4f752eeadcc 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -1968,6 +1968,39 @@ int intel_vgpu_emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
return ret;
}
+int intel_vgpu_write_protect_handler(struct intel_vgpu *vgpu, u64 pa,
+ void *p_data, unsigned int bytes)
+{
+ struct intel_gvt *gvt = vgpu->gvt;
+ int ret = 0;
+
+ if (atomic_read(&vgpu->gtt.n_tracked_guest_page)) {
+ struct intel_vgpu_page_track *t;
+
+ mutex_lock(&gvt->lock);
+
+ t = intel_vgpu_find_tracked_page(vgpu, pa >> PAGE_SHIFT);
+ if (t) {
+ if (unlikely(vgpu->failsafe)) {
+ /* remove write protection to prevent furture traps */
+ intel_vgpu_clean_page_track(vgpu, t);
+ } else {
+ ret = t->handler(t, pa, p_data, bytes);
+ if (ret) {
+ gvt_err("guest page write error %d, "
+ "gfn 0x%lx, pa 0x%llx, "
+ "var 0x%x, len %d\n",
+ ret, t->gfn, pa,
+ *(u32 *)p_data, bytes);
+ }
+ }
+ }
+ mutex_unlock(&gvt->lock);
+ }
+ return ret;
+}
+
+
static int alloc_scratch_pages(struct intel_vgpu *vgpu,
intel_gvt_gtt_type_t type)
{
@@ -2244,7 +2277,7 @@ struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu,
int intel_vgpu_g2v_create_ppgtt_mm(struct intel_vgpu *vgpu,
int page_table_level)
{
- u64 *pdp = (u64 *)&vgpu_vreg64(vgpu, vgtif_reg(pdp[0]));
+ u64 *pdp = (u64 *)&vgpu_vreg64_t(vgpu, vgtif_reg(pdp[0]));
struct intel_vgpu_mm *mm;
if (WARN_ON((page_table_level != 4) && (page_table_level != 3)))
@@ -2279,7 +2312,7 @@ int intel_vgpu_g2v_create_ppgtt_mm(struct intel_vgpu *vgpu,
int intel_vgpu_g2v_destroy_ppgtt_mm(struct intel_vgpu *vgpu,
int page_table_level)
{
- u64 *pdp = (u64 *)&vgpu_vreg64(vgpu, vgtif_reg(pdp[0]));
+ u64 *pdp = (u64 *)&vgpu_vreg64_t(vgpu, vgtif_reg(pdp[0]));
struct intel_vgpu_mm *mm;
if (WARN_ON((page_table_level != 4) && (page_table_level != 3)))
diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h
index f98c1c19b4cb..4cc13b5934f1 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.h
+++ b/drivers/gpu/drm/i915/gvt/gtt.h
@@ -308,4 +308,7 @@ int intel_vgpu_emulate_gtt_mmio_read(struct intel_vgpu *vgpu,
int intel_vgpu_emulate_gtt_mmio_write(struct intel_vgpu *vgpu,
unsigned int off, void *p_data, unsigned int bytes);
+int intel_vgpu_write_protect_handler(struct intel_vgpu *vgpu, u64 pa,
+ void *p_data, unsigned int bytes);
+
#endif /* _GVT_GTT_H_ */
diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c
index 643bb961d40d..fac54f32d33f 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.c
+++ b/drivers/gpu/drm/i915/gvt/gvt.c
@@ -183,6 +183,7 @@ static const struct intel_gvt_ops intel_gvt_ops = {
.get_gvt_attrs = intel_get_gvt_attrs,
.vgpu_query_plane = intel_vgpu_query_plane,
.vgpu_get_dmabuf = intel_vgpu_get_dmabuf,
+ .write_protect_handler = intel_vgpu_write_protect_handler,
};
/**
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 1e9f11c8b7bb..7dc7a80213a8 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -412,23 +412,20 @@ void intel_vgpu_free_resource(struct intel_vgpu *vgpu);
void intel_vgpu_write_fence(struct intel_vgpu *vgpu,
u32 fence, u64 value);
-/* Macros for easily accessing vGPU virtual/shadow register */
-#define vgpu_vreg(vgpu, reg) \
- (*(u32 *)(vgpu->mmio.vreg + INTEL_GVT_MMIO_OFFSET(reg)))
-#define vgpu_vreg8(vgpu, reg) \
- (*(u8 *)(vgpu->mmio.vreg + INTEL_GVT_MMIO_OFFSET(reg)))
-#define vgpu_vreg16(vgpu, reg) \
- (*(u16 *)(vgpu->mmio.vreg + INTEL_GVT_MMIO_OFFSET(reg)))
-#define vgpu_vreg64(vgpu, reg) \
- (*(u64 *)(vgpu->mmio.vreg + INTEL_GVT_MMIO_OFFSET(reg)))
-#define vgpu_sreg(vgpu, reg) \
- (*(u32 *)(vgpu->mmio.sreg + INTEL_GVT_MMIO_OFFSET(reg)))
-#define vgpu_sreg8(vgpu, reg) \
- (*(u8 *)(vgpu->mmio.sreg + INTEL_GVT_MMIO_OFFSET(reg)))
-#define vgpu_sreg16(vgpu, reg) \
- (*(u16 *)(vgpu->mmio.sreg + INTEL_GVT_MMIO_OFFSET(reg)))
-#define vgpu_sreg64(vgpu, reg) \
- (*(u64 *)(vgpu->mmio.sreg + INTEL_GVT_MMIO_OFFSET(reg)))
+/* Macros for easily accessing vGPU virtual/shadow register.
+ Explicitly seperate use for typed MMIO reg or real offset.*/
+#define vgpu_vreg_t(vgpu, reg) \
+ (*(u32 *)(vgpu->mmio.vreg + i915_mmio_reg_offset(reg)))
+#define vgpu_vreg(vgpu, offset) \
+ (*(u32 *)(vgpu->mmio.vreg + (offset)))
+#define vgpu_vreg64_t(vgpu, reg) \
+ (*(u64 *)(vgpu->mmio.vreg + i915_mmio_reg_offset(reg)))
+#define vgpu_vreg64(vgpu, offset) \
+ (*(u64 *)(vgpu->mmio.vreg + (offset)))
+#define vgpu_sreg_t(vgpu, reg) \
+ (*(u32 *)(vgpu->mmio.sreg + i915_mmio_reg_offset(reg)))
+#define vgpu_sreg(vgpu, offset) \
+ (*(u32 *)(vgpu->mmio.sreg + (offset)))
#define for_each_active_vgpu(gvt, vgpu, id) \
idr_for_each_entry((&(gvt)->vgpu_idr), (vgpu), (id)) \
@@ -549,6 +546,8 @@ struct intel_gvt_ops {
struct attribute_group ***intel_vgpu_type_groups);
int (*vgpu_query_plane)(struct intel_vgpu *vgpu, void *);
int (*vgpu_get_dmabuf)(struct intel_vgpu *vgpu, unsigned int);
+ int (*write_protect_handler)(struct intel_vgpu *, u64, void *,
+ unsigned int);
};
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index c982867e7c2b..92d6468daeee 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -343,13 +343,13 @@ static int pch_pp_control_mmio_write(struct intel_vgpu *vgpu,
write_vreg(vgpu, offset, p_data, bytes);
if (vgpu_vreg(vgpu, offset) & PANEL_POWER_ON) {
- vgpu_vreg(vgpu, PCH_PP_STATUS) |= PP_ON;
- vgpu_vreg(vgpu, PCH_PP_STATUS) |= PP_SEQUENCE_STATE_ON_IDLE;
- vgpu_vreg(vgpu, PCH_PP_STATUS) &= ~PP_SEQUENCE_POWER_DOWN;
- vgpu_vreg(vgpu, PCH_PP_STATUS) &= ~PP_CYCLE_DELAY_ACTIVE;
+ vgpu_vreg_t(vgpu, PCH_PP_STATUS) |= PP_ON;
+ vgpu_vreg_t(vgpu, PCH_PP_STATUS) |= PP_SEQUENCE_STATE_ON_IDLE;
+ vgpu_vreg_t(vgpu, PCH_PP_STATUS) &= ~PP_SEQUENCE_POWER_DOWN;
+ vgpu_vreg_t(vgpu, PCH_PP_STATUS) &= ~PP_CYCLE_DELAY_ACTIVE;
} else
- vgpu_vreg(vgpu, PCH_PP_STATUS) &=
+ vgpu_vreg_t(vgpu, PCH_PP_STATUS) &=
~(PP_ON | PP_SEQUENCE_POWER_DOWN
| PP_CYCLE_DELAY_ACTIVE);
return 0;
@@ -503,7 +503,7 @@ static int ddi_buf_ctl_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
} else {
vgpu_vreg(vgpu, offset) |= DDI_BUF_IS_IDLE;
if (offset == i915_mmio_reg_offset(DDI_BUF_CTL(PORT_E)))
- vgpu_vreg(vgpu, DP_TP_STATUS(PORT_E))
+ vgpu_vreg_t(vgpu, DP_TP_STATUS(PORT_E))
&= ~DP_TP_STATUS_AUTOTRAIN_DONE;
}
return 0;
@@ -521,9 +521,9 @@ static int fdi_rx_iir_mmio_write(struct intel_vgpu *vgpu,
static int fdi_auto_training_started(struct intel_vgpu *vgpu)
{
- u32 ddi_buf_ctl = vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_E));
+ u32 ddi_buf_ctl = vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_E));
u32 rx_ctl = vgpu_vreg(vgpu, _FDI_RXA_CTL);
- u32 tx_ctl = vgpu_vreg(vgpu, DP_TP_CTL(PORT_E));
+ u32 tx_ctl = vgpu_vreg_t(vgpu, DP_TP_CTL(PORT_E));
if ((ddi_buf_ctl & DDI_BUF_CTL_ENABLE) &&
(rx_ctl & FDI_RX_ENABLE) &&
@@ -564,12 +564,12 @@ static int check_fdi_rx_train_status(struct intel_vgpu *vgpu,
fdi_tx_check_bits = FDI_TX_ENABLE | fdi_tx_train_bits;
/* If imr bit has been masked */
- if (vgpu_vreg(vgpu, fdi_rx_imr) & fdi_iir_check_bits)
+ if (vgpu_vreg_t(vgpu, fdi_rx_imr) & fdi_iir_check_bits)
return 0;
- if (((vgpu_vreg(vgpu, fdi_tx_ctl) & fdi_tx_check_bits)
+ if (((vgpu_vreg_t(vgpu, fdi_tx_ctl) & fdi_tx_check_bits)
== fdi_tx_check_bits)
- && ((vgpu_vreg(vgpu, fdi_rx_ctl) & fdi_rx_check_bits)
+ && ((vgpu_vreg_t(vgpu, fdi_rx_ctl) & fdi_rx_check_bits)
== fdi_rx_check_bits))
return 1;
else
@@ -626,17 +626,17 @@ static int update_fdi_rx_iir_status(struct intel_vgpu *vgpu,
if (ret < 0)
return ret;
if (ret)
- vgpu_vreg(vgpu, fdi_rx_iir) |= FDI_RX_BIT_LOCK;
+ vgpu_vreg_t(vgpu, fdi_rx_iir) |= FDI_RX_BIT_LOCK;
ret = check_fdi_rx_train_status(vgpu, index, FDI_LINK_TRAIN_PATTERN2);
if (ret < 0)
return ret;
if (ret)
- vgpu_vreg(vgpu, fdi_rx_iir) |= FDI_RX_SYMBOL_LOCK;
+ vgpu_vreg_t(vgpu, fdi_rx_iir) |= FDI_RX_SYMBOL_LOCK;
if (offset == _FDI_RXA_CTL)
if (fdi_auto_training_started(vgpu))
- vgpu_vreg(vgpu, DP_TP_STATUS(PORT_E)) |=
+ vgpu_vreg_t(vgpu, DP_TP_STATUS(PORT_E)) |=
DP_TP_STATUS_AUTOTRAIN_DONE;
return 0;
}
@@ -657,7 +657,7 @@ static int dp_tp_ctl_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
data = (vgpu_vreg(vgpu, offset) & GENMASK(10, 8)) >> 8;
if (data == 0x2) {
status_reg = DP_TP_STATUS(index);
- vgpu_vreg(vgpu, status_reg) |= (1 << 25);
+ vgpu_vreg_t(vgpu, status_reg) |= (1 << 25);
}
return 0;
}
@@ -721,7 +721,7 @@ static int pri_surf_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
};
write_vreg(vgpu, offset, p_data, bytes);
- vgpu_vreg(vgpu, surflive_reg) = vgpu_vreg(vgpu, offset);
+ vgpu_vreg_t(vgpu, surflive_reg) = vgpu_vreg(vgpu, offset);
set_bit(flip_event[index], vgpu->irq.flip_done_event[index]);
return 0;
@@ -742,7 +742,7 @@ static int spr_surf_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
};
write_vreg(vgpu, offset, p_data, bytes);
- vgpu_vreg(vgpu, surflive_reg) = vgpu_vreg(vgpu, offset);
+ vgpu_vreg_t(vgpu, surflive_reg) = vgpu_vreg(vgpu, offset);
set_bit(flip_event[index], vgpu->irq.flip_done_event[index]);
return 0;
@@ -1064,9 +1064,9 @@ static void write_virtual_sbi_register(struct intel_vgpu *vgpu,
static int sbi_data_mmio_read(struct intel_vgpu *vgpu, unsigned int offset,
void *p_data, unsigned int bytes)
{
- if (((vgpu_vreg(vgpu, SBI_CTL_STAT) & SBI_OPCODE_MASK) >>
+ if (((vgpu_vreg_t(vgpu, SBI_CTL_STAT) & SBI_OPCODE_MASK) >>
SBI_OPCODE_SHIFT) == SBI_CMD_CRRD) {
- unsigned int sbi_offset = (vgpu_vreg(vgpu, SBI_ADDR) &
+ unsigned int sbi_offset = (vgpu_vreg_t(vgpu, SBI_ADDR) &
SBI_ADDR_OFFSET_MASK) >> SBI_ADDR_OFFSET_SHIFT;
vgpu_vreg(vgpu, offset) = read_virtual_sbi_register(vgpu,
sbi_offset);
@@ -1091,13 +1091,13 @@ static int sbi_ctl_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
vgpu_vreg(vgpu, offset) = data;
- if (((vgpu_vreg(vgpu, SBI_CTL_STAT) & SBI_OPCODE_MASK) >>
+ if (((vgpu_vreg_t(vgpu, SBI_CTL_STAT) & SBI_OPCODE_MASK) >>
SBI_OPCODE_SHIFT) == SBI_CMD_CRWR) {
- unsigned int sbi_offset = (vgpu_vreg(vgpu, SBI_ADDR) &
+ unsigned int sbi_offset = (vgpu_vreg_t(vgpu, SBI_ADDR) &
SBI_ADDR_OFFSET_MASK) >> SBI_ADDR_OFFSET_SHIFT;
write_virtual_sbi_register(vgpu, sbi_offset,
- vgpu_vreg(vgpu, SBI_DATA));
+ vgpu_vreg_t(vgpu, SBI_DATA));
}
return 0;
}
@@ -1343,7 +1343,7 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset,
{
u32 value = *(u32 *)p_data;
u32 cmd = value & 0xff;
- u32 *data0 = &vgpu_vreg(vgpu, GEN6_PCODE_DATA);
+ u32 *data0 = &vgpu_vreg_t(vgpu, GEN6_PCODE_DATA);
switch (cmd) {
case GEN9_PCODE_READ_MEM_LATENCY:
@@ -1586,7 +1586,7 @@ static int ring_reset_ctl_write(struct intel_vgpu *vgpu,
}
#define MMIO_F(reg, s, f, am, rm, d, r, w) do { \
- ret = new_mmio_info(gvt, INTEL_GVT_MMIO_OFFSET(reg), \
+ ret = new_mmio_info(gvt, i915_mmio_reg_offset(reg), \
f, s, am, rm, d, r, w); \
if (ret) \
return ret; \
@@ -1654,22 +1654,22 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
MMIO_GM_RDR(BLT_HWS_PGA_GEN7, D_ALL, NULL, NULL);
MMIO_GM_RDR(VEBOX_HWS_PGA_GEN7, D_ALL, NULL, NULL);
-#define RING_REG(base) (base + 0x28)
+#define RING_REG(base) _MMIO((base) + 0x28)
MMIO_RING_DFH(RING_REG, D_ALL, F_CMD_ACCESS, NULL, NULL);
#undef RING_REG
-#define RING_REG(base) (base + 0x134)
+#define RING_REG(base) _MMIO((base) + 0x134)
MMIO_RING_DFH(RING_REG, D_ALL, F_CMD_ACCESS, NULL, NULL);
#undef RING_REG
-#define RING_REG(base) (base + 0x6c)
+#define RING_REG(base) _MMIO((base) + 0x6c)
MMIO_RING_DFH(RING_REG, D_ALL, 0, mmio_read_from_hw, NULL);
#undef RING_REG
MMIO_DH(GEN7_SC_INSTDONE, D_BDW_PLUS, mmio_read_from_hw, NULL);
- MMIO_GM_RDR(0x2148, D_ALL, NULL, NULL);
+ MMIO_GM_RDR(_MMIO(0x2148), D_ALL, NULL, NULL);
MMIO_GM_RDR(CCID, D_ALL, NULL, NULL);
- MMIO_GM_RDR(0x12198, D_ALL, NULL, NULL);
+ MMIO_GM_RDR(_MMIO(0x12198), D_ALL, NULL, NULL);
MMIO_D(GEN7_CXT_SIZE, D_ALL);
MMIO_RING_DFH(RING_TAIL, D_ALL, F_CMD_ACCESS, NULL, NULL);
@@ -1679,7 +1679,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
MMIO_RING_GM_RDR(RING_START, D_ALL, NULL, NULL);
/* RING MODE */
-#define RING_REG(base) (base + 0x29c)
+#define RING_REG(base) _MMIO((base) + 0x29c)
MMIO_RING_DFH(RING_REG, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL,
ring_mode_mmio_write);
#undef RING_REG
@@ -1698,37 +1698,37 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
NULL, NULL);
MMIO_DFH(CACHE_MODE_1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
MMIO_DFH(CACHE_MODE_0, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0x2124, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0x2124), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0x20dc, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0x20dc), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
MMIO_DFH(_3D_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0x2088, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0x20e4, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0x2470, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0x2088), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0x20e4), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0x2470), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
MMIO_DFH(GAM_ECOCHK, D_ALL, F_CMD_ACCESS, NULL, NULL);
MMIO_DFH(GEN7_COMMON_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS,
NULL, NULL);
MMIO_DFH(COMMON_SLICE_CHICKEN2, D_ALL, F_MODE_MASK | F_CMD_ACCESS,
NULL, NULL);
- MMIO_DFH(0x9030, D_ALL, F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0x20a0, D_ALL, F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0x2420, D_ALL, F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0x2430, D_ALL, F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0x2434, D_ALL, F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0x2438, D_ALL, F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0x243c, D_ALL, F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0x7018, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0x9030), D_ALL, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0x20a0), D_ALL, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0x2420), D_ALL, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0x2430), D_ALL, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0x2434), D_ALL, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0x2438), D_ALL, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0x243c), D_ALL, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0x7018), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
MMIO_DFH(HALF_SLICE_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
MMIO_DFH(GEN7_HALF_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
/* display */
- MMIO_F(0x60220, 0x20, 0, 0, 0, D_ALL, NULL, NULL);
- MMIO_D(0x602a0, D_ALL);
+ MMIO_F(_MMIO(0x60220), 0x20, 0, 0, 0, D_ALL, NULL, NULL);
+ MMIO_D(_MMIO(0x602a0), D_ALL);
- MMIO_D(0x65050, D_ALL);
- MMIO_D(0x650b4, D_ALL);
+ MMIO_D(_MMIO(0x65050), D_ALL);
+ MMIO_D(_MMIO(0x650b4), D_ALL);
- MMIO_D(0xc4040, D_ALL);
+ MMIO_D(_MMIO(0xc4040), D_ALL);
MMIO_D(DERRMR, D_ALL);
MMIO_D(PIPEDSL(PIPE_A), D_ALL);
@@ -1768,14 +1768,14 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
MMIO_D(CURBASE(PIPE_B), D_ALL);
MMIO_D(CURBASE(PIPE_C), D_ALL);
- MMIO_D(0x700ac, D_ALL);
- MMIO_D(0x710ac, D_ALL);
- MMIO_D(0x720ac, D_ALL);
+ MMIO_D(_MMIO(0x700ac), D_ALL);
+ MMIO_D(_MMIO(0x710ac), D_ALL);
+ MMIO_D(_MMIO(0x720ac), D_ALL);
- MMIO_D(0x70090, D_ALL);
- MMIO_D(0x70094, D_ALL);
- MMIO_D(0x70098, D_ALL);
- MMIO_D(0x7009c, D_ALL);
+ MMIO_D(_MMIO(0x70090), D_ALL);
+ MMIO_D(_MMIO(0x70094), D_ALL);
+ MMIO_D(_MMIO(0x70098), D_ALL);
+ MMIO_D(_MMIO(0x7009c), D_ALL);
MMIO_D(DSPCNTR(PIPE_A), D_ALL);
MMIO_D(DSPADDR(PIPE_A), D_ALL);
@@ -1951,24 +1951,24 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
MMIO_D(BLC_PWM_PCH_CTL1, D_ALL);
MMIO_D(BLC_PWM_PCH_CTL2, D_ALL);
- MMIO_D(0x48268, D_ALL);
+ MMIO_D(_MMIO(0x48268), D_ALL);
MMIO_F(PCH_GMBUS0, 4 * 4, 0, 0, 0, D_ALL, gmbus_mmio_read,
gmbus_mmio_write);
MMIO_F(PCH_GPIOA, 6 * 4, F_UNALIGN, 0, 0, D_ALL, NULL, NULL);
- MMIO_F(0xe4f00, 0x28, 0, 0, 0, D_ALL, NULL, NULL);
+ MMIO_F(_MMIO(0xe4f00), 0x28, 0, 0, 0, D_ALL, NULL, NULL);
- MMIO_F(_PCH_DPB_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_PRE_SKL, NULL,
+ MMIO_F(_MMIO(_PCH_DPB_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_PRE_SKL, NULL,
dp_aux_ch_ctl_mmio_write);
- MMIO_F(_PCH_DPC_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_PRE_SKL, NULL,
+ MMIO_F(_MMIO(_PCH_DPC_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_PRE_SKL, NULL,
dp_aux_ch_ctl_mmio_write);
- MMIO_F(_PCH_DPD_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_PRE_SKL, NULL,
+ MMIO_F(_MMIO(_PCH_DPD_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_PRE_SKL, NULL,
dp_aux_ch_ctl_mmio_write);
MMIO_DH(PCH_ADPA, D_PRE_SKL, NULL, pch_adpa_mmio_write);
- MMIO_DH(_PCH_TRANSACONF, D_ALL, NULL, transconf_mmio_write);
- MMIO_DH(_PCH_TRANSBCONF, D_ALL, NULL, transconf_mmio_write);
+ MMIO_DH(_MMIO(_PCH_TRANSACONF), D_ALL, NULL, transconf_mmio_write);
+ MMIO_DH(_MMIO(_PCH_TRANSBCONF), D_ALL, NULL, transconf_mmio_write);
MMIO_DH(FDI_RX_IIR(PIPE_A), D_ALL, NULL, fdi_rx_iir_mmio_write);
MMIO_DH(FDI_RX_IIR(PIPE_B), D_ALL, NULL, fdi_rx_iir_mmio_write);
@@ -1980,30 +1980,30 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
MMIO_DH(FDI_RX_CTL(PIPE_B), D_ALL, NULL, update_fdi_rx_iir_status);
MMIO_DH(FDI_RX_CTL(PIPE_C), D_ALL, NULL, update_fdi_rx_iir_status);
- MMIO_D(_PCH_TRANS_HTOTAL_A, D_ALL);
- MMIO_D(_PCH_TRANS_HBLANK_A, D_ALL);
- MMIO_D(_PCH_TRANS_HSYNC_A, D_ALL);
- MMIO_D(_PCH_TRANS_VTOTAL_A, D_ALL);
- MMIO_D(_PCH_TRANS_VBLANK_A, D_ALL);
- MMIO_D(_PCH_TRANS_VSYNC_A, D_ALL);
- MMIO_D(_PCH_TRANS_VSYNCSHIFT_A, D_ALL);
-
- MMIO_D(_PCH_TRANS_HTOTAL_B, D_ALL);
- MMIO_D(_PCH_TRANS_HBLANK_B, D_ALL);
- MMIO_D(_PCH_TRANS_HSYNC_B, D_ALL);
- MMIO_D(_PCH_TRANS_VTOTAL_B, D_ALL);
- MMIO_D(_PCH_TRANS_VBLANK_B, D_ALL);
- MMIO_D(_PCH_TRANS_VSYNC_B, D_ALL);
- MMIO_D(_PCH_TRANS_VSYNCSHIFT_B, D_ALL);
-
- MMIO_D(_PCH_TRANSA_DATA_M1, D_ALL);
- MMIO_D(_PCH_TRANSA_DATA_N1, D_ALL);
- MMIO_D(_PCH_TRANSA_DATA_M2, D_ALL);
- MMIO_D(_PCH_TRANSA_DATA_N2, D_ALL);
- MMIO_D(_PCH_TRANSA_LINK_M1, D_ALL);
- MMIO_D(_PCH_TRANSA_LINK_N1, D_ALL);
- MMIO_D(_PCH_TRANSA_LINK_M2, D_ALL);
- MMIO_D(_PCH_TRANSA_LINK_N2, D_ALL);
+ MMIO_D(_MMIO(_PCH_TRANS_HTOTAL_A), D_ALL);
+ MMIO_D(_MMIO(_PCH_TRANS_HBLANK_A), D_ALL);
+ MMIO_D(_MMIO(_PCH_TRANS_HSYNC_A), D_ALL);
+ MMIO_D(_MMIO(_PCH_TRANS_VTOTAL_A), D_ALL);
+ MMIO_D(_MMIO(_PCH_TRANS_VBLANK_A), D_ALL);
+ MMIO_D(_MMIO(_PCH_TRANS_VSYNC_A), D_ALL);
+ MMIO_D(_MMIO(_PCH_TRANS_VSYNCSHIFT_A), D_ALL);
+
+ MMIO_D(_MMIO(_PCH_TRANS_HTOTAL_B), D_ALL);
+ MMIO_D(_MMIO(_PCH_TRANS_HBLANK_B), D_ALL);
+ MMIO_D(_MMIO(_PCH_TRANS_HSYNC_B), D_ALL);
+ MMIO_D(_MMIO(_PCH_TRANS_VTOTAL_B), D_ALL);
+ MMIO_D(_MMIO(_PCH_TRANS_VBLANK_B), D_ALL);
+ MMIO_D(_MMIO(_PCH_TRANS_VSYNC_B), D_ALL);
+ MMIO_D(_MMIO(_PCH_TRANS_VSYNCSHIFT_B), D_ALL);
+
+ MMIO_D(_MMIO(_PCH_TRANSA_DATA_M1), D_ALL);
+ MMIO_D(_MMIO(_PCH_TRANSA_DATA_N1), D_ALL);
+ MMIO_D(_MMIO(_PCH_TRANSA_DATA_M2), D_ALL);
+ MMIO_D(_MMIO(_PCH_TRANSA_DATA_N2), D_ALL);
+ MMIO_D(_MMIO(_PCH_TRANSA_LINK_M1), D_ALL);
+ MMIO_D(_MMIO(_PCH_TRANSA_LINK_N1), D_ALL);
+ MMIO_D(_MMIO(_PCH_TRANSA_LINK_M2), D_ALL);
+ MMIO_D(_MMIO(_PCH_TRANSA_LINK_N2), D_ALL);
MMIO_D(TRANS_DP_CTL(PIPE_A), D_ALL);
MMIO_D(TRANS_DP_CTL(PIPE_B), D_ALL);
@@ -2021,38 +2021,38 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
MMIO_D(TVIDEO_DIP_DATA(PIPE_C), D_ALL);
MMIO_D(TVIDEO_DIP_GCP(PIPE_C), D_ALL);
- MMIO_D(_FDI_RXA_MISC, D_ALL);
- MMIO_D(_FDI_RXB_MISC, D_ALL);
- MMIO_D(_FDI_RXA_TUSIZE1, D_ALL);
- MMIO_D(_FDI_RXA_TUSIZE2, D_ALL);
- MMIO_D(_FDI_RXB_TUSIZE1, D_ALL);
- MMIO_D(_FDI_RXB_TUSIZE2, D_ALL);
+ MMIO_D(_MMIO(_FDI_RXA_MISC), D_ALL);
+ MMIO_D(_MMIO(_FDI_RXB_MISC), D_ALL);
+ MMIO_D(_MMIO(_FDI_RXA_TUSIZE1), D_ALL);
+ MMIO_D(_MMIO(_FDI_RXA_TUSIZE2), D_ALL);
+ MMIO_D(_MMIO(_FDI_RXB_TUSIZE1), D_ALL);
+ MMIO_D(_MMIO(_FDI_RXB_TUSIZE2), D_ALL);
MMIO_DH(PCH_PP_CONTROL, D_ALL, NULL, pch_pp_control_mmio_write);
MMIO_D(PCH_PP_DIVISOR, D_ALL);
MMIO_D(PCH_PP_STATUS, D_ALL);
MMIO_D(PCH_LVDS, D_ALL);
- MMIO_D(_PCH_DPLL_A, D_ALL);
- MMIO_D(_PCH_DPLL_B, D_ALL);
- MMIO_D(_PCH_FPA0, D_ALL);
- MMIO_D(_PCH_FPA1, D_ALL);
- MMIO_D(_PCH_FPB0, D_ALL);
- MMIO_D(_PCH_FPB1, D_ALL);
+ MMIO_D(_MMIO(_PCH_DPLL_A), D_ALL);
+ MMIO_D(_MMIO(_PCH_DPLL_B), D_ALL);
+ MMIO_D(_MMIO(_PCH_FPA0), D_ALL);
+ MMIO_D(_MMIO(_PCH_FPA1), D_ALL);
+ MMIO_D(_MMIO(_PCH_FPB0), D_ALL);
+ MMIO_D(_MMIO(_PCH_FPB1), D_ALL);
MMIO_D(PCH_DREF_CONTROL, D_ALL);
MMIO_D(PCH_RAWCLK_FREQ, D_ALL);
MMIO_D(PCH_DPLL_SEL, D_ALL);
- MMIO_D(0x61208, D_ALL);
- MMIO_D(0x6120c, D_ALL);
+ MMIO_D(_MMIO(0x61208), D_ALL);
+ MMIO_D(_MMIO(0x6120c), D_ALL);
MMIO_D(PCH_PP_ON_DELAYS, D_ALL);
MMIO_D(PCH_PP_OFF_DELAYS, D_ALL);
- MMIO_DH(0xe651c, D_ALL, dpy_reg_mmio_read, NULL);
- MMIO_DH(0xe661c, D_ALL, dpy_reg_mmio_read, NULL);
- MMIO_DH(0xe671c, D_ALL, dpy_reg_mmio_read, NULL);
- MMIO_DH(0xe681c, D_ALL, dpy_reg_mmio_read, NULL);
- MMIO_DH(0xe6c04, D_ALL, dpy_reg_mmio_read, NULL);
- MMIO_DH(0xe6e1c, D_ALL, dpy_reg_mmio_read, NULL);
+ MMIO_DH(_MMIO(0xe651c), D_ALL, dpy_reg_mmio_read, NULL);
+ MMIO_DH(_MMIO(0xe661c), D_ALL, dpy_reg_mmio_read, NULL);
+ MMIO_DH(_MMIO(0xe671c), D_ALL, dpy_reg_mmio_read, NULL);
+ MMIO_DH(_MMIO(0xe681c), D_ALL, dpy_reg_mmio_read, NULL);
+ MMIO_DH(_MMIO(0xe6c04), D_ALL, dpy_reg_mmio_read, NULL);
+ MMIO_DH(_MMIO(0xe6e1c), D_ALL, dpy_reg_mmio_read, NULL);
MMIO_RO(PCH_PORT_HOTPLUG, D_ALL, 0,
PORTA_HOTPLUG_STATUS_MASK
@@ -2074,11 +2074,11 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
MMIO_D(SOUTH_CHICKEN1, D_ALL);
MMIO_DH(SOUTH_CHICKEN2, D_ALL, NULL, south_chicken2_mmio_write);
- MMIO_D(_TRANSA_CHICKEN1, D_ALL);
- MMIO_D(_TRANSB_CHICKEN1, D_ALL);
+ MMIO_D(_MMIO(_TRANSA_CHICKEN1), D_ALL);
+ MMIO_D(_MMIO(_TRANSB_CHICKEN1), D_ALL);
MMIO_D(SOUTH_DSPCLK_GATE_D, D_ALL);
- MMIO_D(_TRANSA_CHICKEN2, D_ALL);
- MMIO_D(_TRANSB_CHICKEN2, D_ALL);
+ MMIO_D(_MMIO(_TRANSA_CHICKEN2), D_ALL);
+ MMIO_D(_MMIO(_TRANSB_CHICKEN2), D_ALL);
MMIO_D(ILK_DPFC_CB_BASE, D_ALL);
MMIO_D(ILK_DPFC_CONTROL, D_ALL);
@@ -2144,24 +2144,24 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
MMIO_D(PREC_PAL_DATA(PIPE_C), D_ALL);
MMIO_F(PREC_PAL_GC_MAX(PIPE_C, 0), 4 * 3, 0, 0, 0, D_ALL, NULL, NULL);
- MMIO_D(0x60110, D_ALL);
- MMIO_D(0x61110, D_ALL);
- MMIO_F(0x70400, 0x40, 0, 0, 0, D_ALL, NULL, NULL);
- MMIO_F(0x71400, 0x40, 0, 0, 0, D_ALL, NULL, NULL);
- MMIO_F(0x72400, 0x40, 0, 0, 0, D_ALL, NULL, NULL);
- MMIO_F(0x70440, 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL);
- MMIO_F(0x71440, 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL);
- MMIO_F(0x72440, 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL);
- MMIO_F(0x7044c, 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL);
- MMIO_F(0x7144c, 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL);
- MMIO_F(0x7244c, 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL);
+ MMIO_D(_MMIO(0x60110), D_ALL);
+ MMIO_D(_MMIO(0x61110), D_ALL);
+ MMIO_F(_MMIO(0x70400), 0x40, 0, 0, 0, D_ALL, NULL, NULL);
+ MMIO_F(_MMIO(0x71400), 0x40, 0, 0, 0, D_ALL, NULL, NULL);
+ MMIO_F(_MMIO(0x72400), 0x40, 0, 0, 0, D_ALL, NULL, NULL);
+ MMIO_F(_MMIO(0x70440), 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL);
+ MMIO_F(_MMIO(0x71440), 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL);
+ MMIO_F(_MMIO(0x72440), 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL);
+ MMIO_F(_MMIO(0x7044c), 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL);
+ MMIO_F(_MMIO(0x7144c), 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL);
+ MMIO_F(_MMIO(0x7244c), 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL);
MMIO_D(PIPE_WM_LINETIME(PIPE_A), D_ALL);
MMIO_D(PIPE_WM_LINETIME(PIPE_B), D_ALL);
MMIO_D(PIPE_WM_LINETIME(PIPE_C), D_ALL);
MMIO_D(SPLL_CTL, D_ALL);
- MMIO_D(_WRPLL_CTL1, D_ALL);
- MMIO_D(_WRPLL_CTL2, D_ALL);
+ MMIO_D(_MMIO(_WRPLL_CTL1), D_ALL);
+ MMIO_D(_MMIO(_WRPLL_CTL2), D_ALL);
MMIO_D(PORT_CLK_SEL(PORT_A), D_ALL);
MMIO_D(PORT_CLK_SEL(PORT_B), D_ALL);
MMIO_D(PORT_CLK_SEL(PORT_C), D_ALL);
@@ -2172,15 +2172,15 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
MMIO_D(TRANS_CLK_SEL(TRANSCODER_C), D_ALL);
MMIO_D(HSW_NDE_RSTWRN_OPT, D_ALL);
- MMIO_D(0x46508, D_ALL);
+ MMIO_D(_MMIO(0x46508), D_ALL);
- MMIO_D(0x49080, D_ALL);
- MMIO_D(0x49180, D_ALL);
- MMIO_D(0x49280, D_ALL);
+ MMIO_D(_MMIO(0x49080), D_ALL);
+ MMIO_D(_MMIO(0x49180), D_ALL);
+ MMIO_D(_MMIO(0x49280), D_ALL);
- MMIO_F(0x49090, 0x14, 0, 0, 0, D_ALL, NULL, NULL);
- MMIO_F(0x49190, 0x14, 0, 0, 0, D_ALL, NULL, NULL);
- MMIO_F(0x49290, 0x14, 0, 0, 0, D_ALL, NULL, NULL);
+ MMIO_F(_MMIO(0x49090), 0x14, 0, 0, 0, D_ALL, NULL, NULL);
+ MMIO_F(_MMIO(0x49190), 0x14, 0, 0, 0, D_ALL, NULL, NULL);
+ MMIO_F(_MMIO(0x49290), 0x14, 0, 0, 0, D_ALL, NULL, NULL);
MMIO_D(GAMMA_MODE(PIPE_A), D_ALL);
MMIO_D(GAMMA_MODE(PIPE_B), D_ALL);
@@ -2200,7 +2200,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
MMIO_DH(SBI_CTL_STAT, D_ALL, NULL, sbi_ctl_mmio_write);
MMIO_D(PIXCLK_GATE, D_ALL);
- MMIO_F(_DPA_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_ALL, NULL,
+ MMIO_F(_MMIO(_DPA_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_ALL, NULL,
dp_aux_ch_ctl_mmio_write);
MMIO_DH(DDI_BUF_CTL(PORT_A), D_ALL, NULL, ddi_buf_ctl_mmio_write);
@@ -2221,24 +2221,24 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
MMIO_DH(DP_TP_STATUS(PORT_D), D_ALL, NULL, dp_tp_status_mmio_write);
MMIO_DH(DP_TP_STATUS(PORT_E), D_ALL, NULL, NULL);
- MMIO_F(_DDI_BUF_TRANS_A, 0x50, 0, 0, 0, D_ALL, NULL, NULL);
- MMIO_F(0x64e60, 0x50, 0, 0, 0, D_ALL, NULL, NULL);
- MMIO_F(0x64eC0, 0x50, 0, 0, 0, D_ALL, NULL, NULL);
- MMIO_F(0x64f20, 0x50, 0, 0, 0, D_ALL, NULL, NULL);
- MMIO_F(0x64f80, 0x50, 0, 0, 0, D_ALL, NULL, NULL);
+ MMIO_F(_MMIO(_DDI_BUF_TRANS_A), 0x50, 0, 0, 0, D_ALL, NULL, NULL);
+ MMIO_F(_MMIO(0x64e60), 0x50, 0, 0, 0, D_ALL, NULL, NULL);
+ MMIO_F(_MMIO(0x64eC0), 0x50, 0, 0, 0, D_ALL, NULL, NULL);
+ MMIO_F(_MMIO(0x64f20), 0x50, 0, 0, 0, D_ALL, NULL, NULL);
+ MMIO_F(_MMIO(0x64f80), 0x50, 0, 0, 0, D_ALL, NULL, NULL);
MMIO_D(HSW_AUD_CFG(PIPE_A), D_ALL);
MMIO_D(HSW_AUD_PIN_ELD_CP_VLD, D_ALL);
- MMIO_DH(_TRANS_DDI_FUNC_CTL_A, D_ALL, NULL, NULL);
- MMIO_DH(_TRANS_DDI_FUNC_CTL_B, D_ALL, NULL, NULL);
- MMIO_DH(_TRANS_DDI_FUNC_CTL_C, D_ALL, NULL, NULL);
- MMIO_DH(_TRANS_DDI_FUNC_CTL_EDP, D_ALL, NULL, NULL);
+ MMIO_DH(_MMIO(_TRANS_DDI_FUNC_CTL_A), D_ALL, NULL, NULL);
+ MMIO_DH(_MMIO(_TRANS_DDI_FUNC_CTL_B), D_ALL, NULL, NULL);
+ MMIO_DH(_MMIO(_TRANS_DDI_FUNC_CTL_C), D_ALL, NULL, NULL);
+ MMIO_DH(_MMIO(_TRANS_DDI_FUNC_CTL_EDP), D_ALL, NULL, NULL);
- MMIO_D(_TRANSA_MSA_MISC, D_ALL);
- MMIO_D(_TRANSB_MSA_MISC, D_ALL);
- MMIO_D(_TRANSC_MSA_MISC, D_ALL);
- MMIO_D(_TRANS_EDP_MSA_MISC, D_ALL);
+ MMIO_D(_MMIO(_TRANSA_MSA_MISC), D_ALL);
+ MMIO_D(_MMIO(_TRANSB_MSA_MISC), D_ALL);
+ MMIO_D(_MMIO(_TRANSC_MSA_MISC), D_ALL);
+ MMIO_D(_MMIO(_TRANS_EDP_MSA_MISC), D_ALL);
MMIO_DH(FORCEWAKE, D_ALL, NULL, NULL);
MMIO_D(FORCEWAKE_ACK, D_ALL);
@@ -2304,101 +2304,101 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
MMIO_D(GEN6_UCGCTL1, D_ALL);
MMIO_D(GEN6_UCGCTL2, D_ALL);
- MMIO_F(0x4f000, 0x90, 0, 0, 0, D_ALL, NULL, NULL);
+ MMIO_F(_MMIO(0x4f000), 0x90, 0, 0, 0, D_ALL, NULL, NULL);
MMIO_D(GEN6_PCODE_DATA, D_ALL);
- MMIO_D(0x13812c, D_ALL);
+ MMIO_D(_MMIO(0x13812c), D_ALL);
MMIO_DH(GEN7_ERR_INT, D_ALL, NULL, NULL);
MMIO_D(HSW_EDRAM_CAP, D_ALL);
MMIO_D(HSW_IDICR, D_ALL);
MMIO_DH(GFX_FLSH_CNTL_GEN6, D_ALL, NULL, NULL);
- MMIO_D(0x3c, D_ALL);
- MMIO_D(0x860, D_ALL);
+ MMIO_D(_MMIO(0x3c), D_ALL);
+ MMIO_D(_MMIO(0x860), D_ALL);
MMIO_D(ECOSKPD, D_ALL);
- MMIO_D(0x121d0, D_ALL);
+ MMIO_D(_MMIO(0x121d0), D_ALL);
MMIO_D(GEN6_BLITTER_ECOSKPD, D_ALL);
- MMIO_D(0x41d0, D_ALL);
+ MMIO_D(_MMIO(0x41d0), D_ALL);
MMIO_D(GAC_ECO_BITS, D_ALL);
- MMIO_D(0x6200, D_ALL);
- MMIO_D(0x6204, D_ALL);
- MMIO_D(0x6208, D_ALL);
- MMIO_D(0x7118, D_ALL);
- MMIO_D(0x7180, D_ALL);
- MMIO_D(0x7408, D_ALL);
- MMIO_D(0x7c00, D_ALL);
+ MMIO_D(_MMIO(0x6200), D_ALL);
+ MMIO_D(_MMIO(0x6204), D_ALL);
+ MMIO_D(_MMIO(0x6208), D_ALL);
+ MMIO_D(_MMIO(0x7118), D_ALL);
+ MMIO_D(_MMIO(0x7180), D_ALL);
+ MMIO_D(_MMIO(0x7408), D_ALL);
+ MMIO_D(_MMIO(0x7c00), D_ALL);
MMIO_DH(GEN6_MBCTL, D_ALL, NULL, mbctl_write);
- MMIO_D(0x911c, D_ALL);
- MMIO_D(0x9120, D_ALL);
+ MMIO_D(_MMIO(0x911c), D_ALL);
+ MMIO_D(_MMIO(0x9120), D_ALL);
MMIO_DFH(GEN7_UCGCTL4, D_ALL, F_CMD_ACCESS, NULL, NULL);
MMIO_D(GAB_CTL, D_ALL);
- MMIO_D(0x48800, D_ALL);
- MMIO_D(0xce044, D_ALL);
- MMIO_D(0xe6500, D_ALL);
- MMIO_D(0xe6504, D_ALL);
- MMIO_D(0xe6600, D_ALL);
- MMIO_D(0xe6604, D_ALL);
- MMIO_D(0xe6700, D_ALL);
- MMIO_D(0xe6704, D_ALL);
- MMIO_D(0xe6800, D_ALL);
- MMIO_D(0xe6804, D_ALL);
+ MMIO_D(_MMIO(0x48800), D_ALL);
+ MMIO_D(_MMIO(0xce044), D_ALL);
+ MMIO_D(_MMIO(0xe6500), D_ALL);
+ MMIO_D(_MMIO(0xe6504), D_ALL);
+ MMIO_D(_MMIO(0xe6600), D_ALL);
+ MMIO_D(_MMIO(0xe6604), D_ALL);
+ MMIO_D(_MMIO(0xe6700), D_ALL);
+ MMIO_D(_MMIO(0xe6704), D_ALL);
+ MMIO_D(_MMIO(0xe6800), D_ALL);
+ MMIO_D(_MMIO(0xe6804), D_ALL);
MMIO_D(PCH_GMBUS4, D_ALL);
MMIO_D(PCH_GMBUS5, D_ALL);
- MMIO_D(0x902c, D_ALL);
- MMIO_D(0xec008, D_ALL);
- MMIO_D(0xec00c, D_ALL);
- MMIO_D(0xec008 + 0x18, D_ALL);
- MMIO_D(0xec00c + 0x18, D_ALL);
- MMIO_D(0xec008 + 0x18 * 2, D_ALL);
- MMIO_D(0xec00c + 0x18 * 2, D_ALL);
- MMIO_D(0xec008 + 0x18 * 3, D_ALL);
- MMIO_D(0xec00c + 0x18 * 3, D_ALL);
- MMIO_D(0xec408, D_ALL);
- MMIO_D(0xec40c, D_ALL);
- MMIO_D(0xec408 + 0x18, D_ALL);
- MMIO_D(0xec40c + 0x18, D_ALL);
- MMIO_D(0xec408 + 0x18 * 2, D_ALL);
- MMIO_D(0xec40c + 0x18 * 2, D_ALL);
- MMIO_D(0xec408 + 0x18 * 3, D_ALL);
- MMIO_D(0xec40c + 0x18 * 3, D_ALL);
- MMIO_D(0xfc810, D_ALL);
- MMIO_D(0xfc81c, D_ALL);
- MMIO_D(0xfc828, D_ALL);
- MMIO_D(0xfc834, D_ALL);
- MMIO_D(0xfcc00, D_ALL);
- MMIO_D(0xfcc0c, D_ALL);
- MMIO_D(0xfcc18, D_ALL);
- MMIO_D(0xfcc24, D_ALL);
- MMIO_D(0xfd000, D_ALL);
- MMIO_D(0xfd00c, D_ALL);
- MMIO_D(0xfd018, D_ALL);
- MMIO_D(0xfd024, D_ALL);
- MMIO_D(0xfd034, D_ALL);
+ MMIO_D(_MMIO(0x902c), D_ALL);
+ MMIO_D(_MMIO(0xec008), D_ALL);
+ MMIO_D(_MMIO(0xec00c), D_ALL);
+ MMIO_D(_MMIO(0xec008 + 0x18), D_ALL);
+ MMIO_D(_MMIO(0xec00c + 0x18), D_ALL);
+ MMIO_D(_MMIO(0xec008 + 0x18 * 2), D_ALL);
+ MMIO_D(_MMIO(0xec00c + 0x18 * 2), D_ALL);
+ MMIO_D(_MMIO(0xec008 + 0x18 * 3), D_ALL);
+ MMIO_D(_MMIO(0xec00c + 0x18 * 3), D_ALL);
+ MMIO_D(_MMIO(0xec408), D_ALL);
+ MMIO_D(_MMIO(0xec40c), D_ALL);
+ MMIO_D(_MMIO(0xec408 + 0x18), D_ALL);
+ MMIO_D(_MMIO(0xec40c + 0x18), D_ALL);
+ MMIO_D(_MMIO(0xec408 + 0x18 * 2), D_ALL);
+ MMIO_D(_MMIO(0xec40c + 0x18 * 2), D_ALL);
+ MMIO_D(_MMIO(0xec408 + 0x18 * 3), D_ALL);
+ MMIO_D(_MMIO(0xec40c + 0x18 * 3), D_ALL);
+ MMIO_D(_MMIO(0xfc810), D_ALL);
+ MMIO_D(_MMIO(0xfc81c), D_ALL);
+ MMIO_D(_MMIO(0xfc828), D_ALL);
+ MMIO_D(_MMIO(0xfc834), D_ALL);
+ MMIO_D(_MMIO(0xfcc00), D_ALL);
+ MMIO_D(_MMIO(0xfcc0c), D_ALL);
+ MMIO_D(_MMIO(0xfcc18), D_ALL);
+ MMIO_D(_MMIO(0xfcc24), D_ALL);
+ MMIO_D(_MMIO(0xfd000), D_ALL);
+ MMIO_D(_MMIO(0xfd00c), D_ALL);
+ MMIO_D(_MMIO(0xfd018), D_ALL);
+ MMIO_D(_MMIO(0xfd024), D_ALL);
+ MMIO_D(_MMIO(0xfd034), D_ALL);
MMIO_DH(FPGA_DBG, D_ALL, NULL, fpga_dbg_mmio_write);
- MMIO_D(0x2054, D_ALL);
- MMIO_D(0x12054, D_ALL);
- MMIO_D(0x22054, D_ALL);
- MMIO_D(0x1a054, D_ALL);
-
- MMIO_D(0x44070, D_ALL);
- MMIO_DFH(0x215c, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0x2178, D_ALL, F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0x217c, D_ALL, F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0x12178, D_ALL, F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0x1217c, D_ALL, F_CMD_ACCESS, NULL, NULL);
-
- MMIO_F(0x2290, 8, F_CMD_ACCESS, 0, 0, D_BDW_PLUS, NULL, NULL);
- MMIO_D(0x2b00, D_BDW_PLUS);
- MMIO_D(0x2360, D_BDW_PLUS);
- MMIO_F(0x5200, 32, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
- MMIO_F(0x5240, 32, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
- MMIO_F(0x5280, 16, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
-
- MMIO_DFH(0x1c17c, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0x1c178, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+ MMIO_D(_MMIO(0x2054), D_ALL);
+ MMIO_D(_MMIO(0x12054), D_ALL);
+ MMIO_D(_MMIO(0x22054), D_ALL);
+ MMIO_D(_MMIO(0x1a054), D_ALL);
+
+ MMIO_D(_MMIO(0x44070), D_ALL);
+ MMIO_DFH(_MMIO(0x215c), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0x2178), D_ALL, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0x217c), D_ALL, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0x12178), D_ALL, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0x1217c), D_ALL, F_CMD_ACCESS, NULL, NULL);
+
+ MMIO_F(_MMIO(0x2290), 8, F_CMD_ACCESS, 0, 0, D_BDW_PLUS, NULL, NULL);
+ MMIO_D(_MMIO(0x2b00), D_BDW_PLUS);
+ MMIO_D(_MMIO(0x2360), D_BDW_PLUS);
+ MMIO_F(_MMIO(0x5200), 32, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+ MMIO_F(_MMIO(0x5240), 32, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+ MMIO_F(_MMIO(0x5280), 16, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+
+ MMIO_DFH(_MMIO(0x1c17c), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0x1c178), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
MMIO_DFH(BCS_SWCTRL, D_ALL, F_CMD_ACCESS, NULL, NULL);
MMIO_F(HS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
@@ -2412,24 +2412,24 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
MMIO_F(CL_PRIMITIVES_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
MMIO_F(PS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
MMIO_F(PS_DEPTH_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
- MMIO_DH(0x4260, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
- MMIO_DH(0x4264, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
- MMIO_DH(0x4268, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
- MMIO_DH(0x426c, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
- MMIO_DH(0x4270, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
- MMIO_DFH(0x4094, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DH(_MMIO(0x4260), D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
+ MMIO_DH(_MMIO(0x4264), D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
+ MMIO_DH(_MMIO(0x4268), D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
+ MMIO_DH(_MMIO(0x426c), D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
+ MMIO_DH(_MMIO(0x4270), D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
+ MMIO_DFH(_MMIO(0x4094), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
MMIO_DFH(ARB_MODE, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
MMIO_RING_GM_RDR(RING_BBADDR, D_ALL, NULL, NULL);
- MMIO_DFH(0x2220, D_ALL, F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0x12220, D_ALL, F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0x22220, D_ALL, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0x2220), D_ALL, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0x12220), D_ALL, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0x22220), D_ALL, F_CMD_ACCESS, NULL, NULL);
MMIO_RING_DFH(RING_SYNC_1, D_ALL, F_CMD_ACCESS, NULL, NULL);
MMIO_RING_DFH(RING_SYNC_0, D_ALL, F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0x22178, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0x1a178, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0x1a17c, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0x2217c, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0x22178), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0x1a178), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0x1a17c), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0x2217c), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
return 0;
}
@@ -2503,40 +2503,40 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
MMIO_RING_DFH(RING_ACTHD_UDW, D_BDW_PLUS, F_CMD_ACCESS,
mmio_read_from_hw, NULL);
-#define RING_REG(base) (base + 0xd0)
+#define RING_REG(base) _MMIO((base) + 0xd0)
MMIO_RING_F(RING_REG, 4, F_RO, 0,
~_MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET), D_BDW_PLUS, NULL,
ring_reset_ctl_write);
#undef RING_REG
-#define RING_REG(base) (base + 0x230)
+#define RING_REG(base) _MMIO((base) + 0x230)
MMIO_RING_DFH(RING_REG, D_BDW_PLUS, 0, NULL, elsp_mmio_write);
#undef RING_REG
-#define RING_REG(base) (base + 0x234)
+#define RING_REG(base) _MMIO((base) + 0x234)
MMIO_RING_F(RING_REG, 8, F_RO | F_CMD_ACCESS, 0, ~0, D_BDW_PLUS,
NULL, NULL);
#undef RING_REG
-#define RING_REG(base) (base + 0x244)
+#define RING_REG(base) _MMIO((base) + 0x244)
MMIO_RING_DFH(RING_REG, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
#undef RING_REG
-#define RING_REG(base) (base + 0x370)
+#define RING_REG(base) _MMIO((base) + 0x370)
MMIO_RING_F(RING_REG, 48, F_RO, 0, ~0, D_BDW_PLUS, NULL, NULL);
#undef RING_REG
-#define RING_REG(base) (base + 0x3a0)
+#define RING_REG(base) _MMIO((base) + 0x3a0)
MMIO_RING_DFH(RING_REG, D_BDW_PLUS, F_MODE_MASK, NULL, NULL);
#undef RING_REG
MMIO_D(PIPEMISC(PIPE_A), D_BDW_PLUS);
MMIO_D(PIPEMISC(PIPE_B), D_BDW_PLUS);
MMIO_D(PIPEMISC(PIPE_C), D_BDW_PLUS);
- MMIO_D(0x1c1d0, D_BDW_PLUS);
+ MMIO_D(_MMIO(0x1c1d0), D_BDW_PLUS);
MMIO_D(GEN6_MBCUNIT_SNPCR, D_BDW_PLUS);
MMIO_D(GEN7_MISCCPCTL, D_BDW_PLUS);
- MMIO_D(0x1c054, D_BDW_PLUS);
+ MMIO_D(_MMIO(0x1c054), D_BDW_PLUS);
MMIO_DH(GEN6_PCODE_MAILBOX, D_BDW_PLUS, NULL, mailbox_write);
@@ -2545,7 +2545,7 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
MMIO_D(GAMTARBMODE, D_BDW_PLUS);
-#define RING_REG(base) (base + 0x270)
+#define RING_REG(base) _MMIO((base) + 0x270)
MMIO_RING_F(RING_REG, 32, 0, 0, 0, D_BDW_PLUS, NULL, NULL);
#undef RING_REG
@@ -2558,10 +2558,10 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
MMIO_D(CHICKEN_PIPESL_1(PIPE_C), D_BDW_PLUS);
MMIO_D(WM_MISC, D_BDW);
- MMIO_D(BDW_EDP_PSR_BASE, D_BDW);
+ MMIO_D(_MMIO(BDW_EDP_PSR_BASE), D_BDW);
- MMIO_D(0x66c00, D_BDW_PLUS);
- MMIO_D(0x66c04, D_BDW_PLUS);
+ MMIO_D(_MMIO(0x66c00), D_BDW_PLUS);
+ MMIO_D(_MMIO(0x66c04), D_BDW_PLUS);
MMIO_D(HSW_GTT_CACHE_EN, D_BDW_PLUS);
@@ -2569,54 +2569,54 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
MMIO_D(GEN8_EU_DISABLE1, D_BDW_PLUS);
MMIO_D(GEN8_EU_DISABLE2, D_BDW_PLUS);
- MMIO_D(0xfdc, D_BDW_PLUS);
+ MMIO_D(_MMIO(0xfdc), D_BDW_PLUS);
MMIO_DFH(GEN8_ROW_CHICKEN, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS,
NULL, NULL);
MMIO_DFH(GEN7_ROW_CHICKEN2, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS,
NULL, NULL);
MMIO_DFH(GEN8_UCGCTL6, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0xb1f0, D_BDW, F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0xb1c0, D_BDW, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0xb1f0), D_BDW, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0xb1c0), D_BDW, F_CMD_ACCESS, NULL, NULL);
MMIO_DFH(GEN8_L3SQCREG4, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0xb100, D_BDW, F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0xb10c, D_BDW, F_CMD_ACCESS, NULL, NULL);
- MMIO_D(0xb110, D_BDW);
+ MMIO_DFH(_MMIO(0xb100), D_BDW, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0xb10c), D_BDW, F_CMD_ACCESS, NULL, NULL);
+ MMIO_D(_MMIO(0xb110), D_BDW);
- MMIO_F(0x24d0, 48, F_CMD_ACCESS, 0, 0, D_BDW_PLUS,
+ MMIO_F(_MMIO(0x24d0), 48, F_CMD_ACCESS, 0, 0, D_BDW_PLUS,
NULL, force_nonpriv_write);
- MMIO_D(0x44484, D_BDW_PLUS);
- MMIO_D(0x4448c, D_BDW_PLUS);
+ MMIO_D(_MMIO(0x44484), D_BDW_PLUS);
+ MMIO_D(_MMIO(0x4448c), D_BDW_PLUS);
- MMIO_DFH(0x83a4, D_BDW, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0x83a4), D_BDW, F_CMD_ACCESS, NULL, NULL);
MMIO_D(GEN8_L3_LRA_1_GPGPU, D_BDW_PLUS);
- MMIO_DFH(0x8430, D_BDW, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0x8430), D_BDW, F_CMD_ACCESS, NULL, NULL);
- MMIO_D(0x110000, D_BDW_PLUS);
+ MMIO_D(_MMIO(0x110000), D_BDW_PLUS);
- MMIO_D(0x48400, D_BDW_PLUS);
+ MMIO_D(_MMIO(0x48400), D_BDW_PLUS);
- MMIO_D(0x6e570, D_BDW_PLUS);
- MMIO_D(0x65f10, D_BDW_PLUS);
+ MMIO_D(_MMIO(0x6e570), D_BDW_PLUS);
+ MMIO_D(_MMIO(0x65f10), D_BDW_PLUS);
- MMIO_DFH(0xe194, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0xe188, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0xe194), D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0xe188), D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
MMIO_DFH(HALF_SLICE_CHICKEN2, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0x2580, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
-
- MMIO_DFH(0x2248, D_BDW, F_CMD_ACCESS, NULL, NULL);
-
- MMIO_DFH(0xe220, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0xe230, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0xe240, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0xe260, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0xe270, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0xe280, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0xe2a0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0xe2b0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0xe2c0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0x2580), D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+
+ MMIO_DFH(_MMIO(0x2248), D_BDW, F_CMD_ACCESS, NULL, NULL);
+
+ MMIO_DFH(_MMIO(0xe220), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0xe230), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0xe240), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0xe260), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0xe270), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0xe280), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0xe2a0), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0xe2b0), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0xe2c0), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
return 0;
}
@@ -2632,11 +2632,11 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
MMIO_DH(FORCEWAKE_MEDIA_GEN9, D_SKL_PLUS, NULL, mul_force_wake_write);
MMIO_DH(FORCEWAKE_ACK_MEDIA_GEN9, D_SKL_PLUS, NULL, NULL);
- MMIO_F(_DPB_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL,
+ MMIO_F(_MMIO(_DPB_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL,
dp_aux_ch_ctl_mmio_write);
- MMIO_F(_DPC_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL,
+ MMIO_F(_MMIO(_DPC_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL,
dp_aux_ch_ctl_mmio_write);
- MMIO_F(_DPD_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL,
+ MMIO_F(_MMIO(_DPD_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL,
dp_aux_ch_ctl_mmio_write);
/*
@@ -2647,26 +2647,26 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
MMIO_DH(HSW_PWR_WELL_CTL_DRIVER(SKL_DISP_PW_MISC_IO), D_SKL_PLUS, NULL,
skl_power_well_ctl_write);
- MMIO_D(0xa210, D_SKL_PLUS);
+ MMIO_D(_MMIO(0xa210), D_SKL_PLUS);
MMIO_D(GEN9_MEDIA_PG_IDLE_HYSTERESIS, D_SKL_PLUS);
MMIO_D(GEN9_RENDER_PG_IDLE_HYSTERESIS, D_SKL_PLUS);
MMIO_DFH(GEN9_GAMT_ECO_REG_RW_IA, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL);
- MMIO_DH(0x4ddc, D_SKL_PLUS, NULL, NULL);
- MMIO_DH(0x42080, D_SKL_PLUS, NULL, NULL);
- MMIO_D(0x45504, D_SKL_PLUS);
- MMIO_D(0x45520, D_SKL_PLUS);
- MMIO_D(0x46000, D_SKL_PLUS);
- MMIO_DH(0x46010, D_SKL | D_KBL, NULL, skl_lcpll_write);
- MMIO_DH(0x46014, D_SKL | D_KBL, NULL, skl_lcpll_write);
- MMIO_D(0x6C040, D_SKL | D_KBL);
- MMIO_D(0x6C048, D_SKL | D_KBL);
- MMIO_D(0x6C050, D_SKL | D_KBL);
- MMIO_D(0x6C044, D_SKL | D_KBL);
- MMIO_D(0x6C04C, D_SKL | D_KBL);
- MMIO_D(0x6C054, D_SKL | D_KBL);
- MMIO_D(0x6c058, D_SKL | D_KBL);
- MMIO_D(0x6c05c, D_SKL | D_KBL);
- MMIO_DH(0X6c060, D_SKL | D_KBL, dpll_status_read, NULL);
+ MMIO_DH(_MMIO(0x4ddc), D_SKL_PLUS, NULL, NULL);
+ MMIO_DH(_MMIO(0x42080), D_SKL_PLUS, NULL, NULL);
+ MMIO_D(_MMIO(0x45504), D_SKL_PLUS);
+ MMIO_D(_MMIO(0x45520), D_SKL_PLUS);
+ MMIO_D(_MMIO(0x46000), D_SKL_PLUS);
+ MMIO_DH(_MMIO(0x46010), D_SKL | D_KBL, NULL, skl_lcpll_write);
+ MMIO_DH(_MMIO(0x46014), D_SKL | D_KBL, NULL, skl_lcpll_write);
+ MMIO_D(_MMIO(0x6C040), D_SKL | D_KBL);
+ MMIO_D(_MMIO(0x6C048), D_SKL | D_KBL);
+ MMIO_D(_MMIO(0x6C050), D_SKL | D_KBL);
+ MMIO_D(_MMIO(0x6C044), D_SKL | D_KBL);
+ MMIO_D(_MMIO(0x6C04C), D_SKL | D_KBL);
+ MMIO_D(_MMIO(0x6C054), D_SKL | D_KBL);
+ MMIO_D(_MMIO(0x6c058), D_SKL | D_KBL);
+ MMIO_D(_MMIO(0x6c05c), D_SKL | D_KBL);
+ MMIO_DH(_MMIO(0x6c060), D_SKL | D_KBL, dpll_status_read, NULL);
MMIO_DH(SKL_PS_WIN_POS(PIPE_A, 0), D_SKL_PLUS, NULL, pf_write);
MMIO_DH(SKL_PS_WIN_POS(PIPE_A, 1), D_SKL_PLUS, NULL, pf_write);
@@ -2755,105 +2755,105 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 2), D_SKL_PLUS, NULL, NULL);
MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 3), D_SKL_PLUS, NULL, NULL);
- MMIO_DH(_REG_701C0(PIPE_A, 1), D_SKL_PLUS, NULL, NULL);
- MMIO_DH(_REG_701C0(PIPE_A, 2), D_SKL_PLUS, NULL, NULL);
- MMIO_DH(_REG_701C0(PIPE_A, 3), D_SKL_PLUS, NULL, NULL);
- MMIO_DH(_REG_701C0(PIPE_A, 4), D_SKL_PLUS, NULL, NULL);
+ MMIO_DH(_MMIO(_REG_701C0(PIPE_A, 1)), D_SKL_PLUS, NULL, NULL);
+ MMIO_DH(_MMIO(_REG_701C0(PIPE_A, 2)), D_SKL_PLUS, NULL, NULL);
+ MMIO_DH(_MMIO(_REG_701C0(PIPE_A, 3)), D_SKL_PLUS, NULL, NULL);
+ MMIO_DH(_MMIO(_REG_701C0(PIPE_A, 4)), D_SKL_PLUS, NULL, NULL);
- MMIO_DH(_REG_701C0(PIPE_B, 1), D_SKL_PLUS, NULL, NULL);
- MMIO_DH(_REG_701C0(PIPE_B, 2), D_SKL_PLUS, NULL, NULL);
- MMIO_DH(_REG_701C0(PIPE_B, 3), D_SKL_PLUS, NULL, NULL);
- MMIO_DH(_REG_701C0(PIPE_B, 4), D_SKL_PLUS, NULL, NULL);
+ MMIO_DH(_MMIO(_REG_701C0(PIPE_B, 1)), D_SKL_PLUS, NULL, NULL);
+ MMIO_DH(_MMIO(_REG_701C0(PIPE_B, 2)), D_SKL_PLUS, NULL, NULL);
+ MMIO_DH(_MMIO(_REG_701C0(PIPE_B, 3)), D_SKL_PLUS, NULL, NULL);
+ MMIO_DH(_MMIO(_REG_701C0(PIPE_B, 4)), D_SKL_PLUS, NULL, NULL);
- MMIO_DH(_REG_701C0(PIPE_C, 1), D_SKL_PLUS, NULL, NULL);
- MMIO_DH(_REG_701C0(PIPE_C, 2), D_SKL_PLUS, NULL, NULL);
- MMIO_DH(_REG_701C0(PIPE_C, 3), D_SKL_PLUS, NULL, NULL);
- MMIO_DH(_REG_701C0(PIPE_C, 4), D_SKL_PLUS, NULL, NULL);
+ MMIO_DH(_MMIO(_REG_701C0(PIPE_C, 1)), D_SKL_PLUS, NULL, NULL);
+ MMIO_DH(_MMIO(_REG_701C0(PIPE_C, 2)), D_SKL_PLUS, NULL, NULL);
+ MMIO_DH(_MMIO(_REG_701C0(PIPE_C, 3)), D_SKL_PLUS, NULL, NULL);
+ MMIO_DH(_MMIO(_REG_701C0(PIPE_C, 4)), D_SKL_PLUS, NULL, NULL);
- MMIO_DH(_REG_701C4(PIPE_A, 1), D_SKL_PLUS, NULL, NULL);
- MMIO_DH(_REG_701C4(PIPE_A, 2), D_SKL_PLUS, NULL, NULL);
- MMIO_DH(_REG_701C4(PIPE_A, 3), D_SKL_PLUS, NULL, NULL);
- MMIO_DH(_REG_701C4(PIPE_A, 4), D_SKL_PLUS, NULL, NULL);
+ MMIO_DH(_MMIO(_REG_701C4(PIPE_A, 1)), D_SKL_PLUS, NULL, NULL);
+ MMIO_DH(_MMIO(_REG_701C4(PIPE_A, 2)), D_SKL_PLUS, NULL, NULL);
+ MMIO_DH(_MMIO(_REG_701C4(PIPE_A, 3)), D_SKL_PLUS, NULL, NULL);
+ MMIO_DH(_MMIO(_REG_701C4(PIPE_A, 4)), D_SKL_PLUS, NULL, NULL);
- MMIO_DH(_REG_701C4(PIPE_B, 1), D_SKL_PLUS, NULL, NULL);
- MMIO_DH(_REG_701C4(PIPE_B, 2), D_SKL_PLUS, NULL, NULL);
- MMIO_DH(_REG_701C4(PIPE_B, 3), D_SKL_PLUS, NULL, NULL);
- MMIO_DH(_REG_701C4(PIPE_B, 4), D_SKL_PLUS, NULL, NULL);
+ MMIO_DH(_MMIO(_REG_701C4(PIPE_B, 1)), D_SKL_PLUS, NULL, NULL);
+ MMIO_DH(_MMIO(_REG_701C4(PIPE_B, 2)), D_SKL_PLUS, NULL, NULL);
+ MMIO_DH(_MMIO(_REG_701C4(PIPE_B, 3)), D_SKL_PLUS, NULL, NULL);
+ MMIO_DH(_MMIO(_REG_701C4(PIPE_B, 4)), D_SKL_PLUS, NULL, NULL);
- MMIO_DH(_REG_701C4(PIPE_C, 1), D_SKL_PLUS, NULL, NULL);
- MMIO_DH(_REG_701C4(PIPE_C, 2), D_SKL_PLUS, NULL, NULL);
- MMIO_DH(_REG_701C4(PIPE_C, 3), D_SKL_PLUS, NULL, NULL);
- MMIO_DH(_REG_701C4(PIPE_C, 4), D_SKL_PLUS, NULL, NULL);
+ MMIO_DH(_MMIO(_REG_701C4(PIPE_C, 1)), D_SKL_PLUS, NULL, NULL);
+ MMIO_DH(_MMIO(_REG_701C4(PIPE_C, 2)), D_SKL_PLUS, NULL, NULL);
+ MMIO_DH(_MMIO(_REG_701C4(PIPE_C, 3)), D_SKL_PLUS, NULL, NULL);
+ MMIO_DH(_MMIO(_REG_701C4(PIPE_C, 4)), D_SKL_PLUS, NULL, NULL);
- MMIO_D(0x70380, D_SKL_PLUS);
- MMIO_D(0x71380, D_SKL_PLUS);
- MMIO_D(0x72380, D_SKL_PLUS);
- MMIO_D(0x7039c, D_SKL_PLUS);
+ MMIO_D(_MMIO(0x70380), D_SKL_PLUS);
+ MMIO_D(_MMIO(0x71380), D_SKL_PLUS);
+ MMIO_D(_MMIO(0x72380), D_SKL_PLUS);
+ MMIO_D(_MMIO(0x7039c), D_SKL_PLUS);
- MMIO_D(0x8f074, D_SKL | D_KBL);
- MMIO_D(0x8f004, D_SKL | D_KBL);
- MMIO_D(0x8f034, D_SKL | D_KBL);
+ MMIO_D(_MMIO(0x8f074), D_SKL | D_KBL);
+ MMIO_D(_MMIO(0x8f004), D_SKL | D_KBL);
+ MMIO_D(_MMIO(0x8f034), D_SKL | D_KBL);
- MMIO_D(0xb11c, D_SKL | D_KBL);
+ MMIO_D(_MMIO(0xb11c), D_SKL | D_KBL);
- MMIO_D(0x51000, D_SKL | D_KBL);
- MMIO_D(0x6c00c, D_SKL_PLUS);
+ MMIO_D(_MMIO(0x51000), D_SKL | D_KBL);
+ MMIO_D(_MMIO(0x6c00c), D_SKL_PLUS);
- MMIO_F(0xc800, 0x7f8, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL);
- MMIO_F(0xb020, 0x80, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL);
+ MMIO_F(_MMIO(0xc800), 0x7f8, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL);
+ MMIO_F(_MMIO(0xb020), 0x80, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL);
- MMIO_D(0xd08, D_SKL_PLUS);
- MMIO_DFH(0x20e0, D_SKL_PLUS, F_MODE_MASK, NULL, NULL);
- MMIO_DFH(0x20ec, D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+ MMIO_D(_MMIO(0xd08), D_SKL_PLUS);
+ MMIO_DFH(_MMIO(0x20e0), D_SKL_PLUS, F_MODE_MASK, NULL, NULL);
+ MMIO_DFH(_MMIO(0x20ec), D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
/* TRTT */
- MMIO_DFH(0x4de0, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0x4de4, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0x4de8, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0x4dec, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0x4df0, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(0x4df4, D_SKL | D_KBL, F_CMD_ACCESS, NULL, gen9_trtte_write);
- MMIO_DH(0x4dfc, D_SKL | D_KBL, NULL, gen9_trtt_chicken_write);
+ MMIO_DFH(_MMIO(0x4de0), D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0x4de4), D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0x4de8), D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0x4dec), D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0x4df0), D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(_MMIO(0x4df4), D_SKL | D_KBL, F_CMD_ACCESS, NULL, gen9_trtte_write);
+ MMIO_DH(_MMIO(0x4dfc), D_SKL | D_KBL, NULL, gen9_trtt_chicken_write);
- MMIO_D(0x45008, D_SKL | D_KBL);
+ MMIO_D(_MMIO(0x45008), D_SKL | D_KBL);
- MMIO_D(0x46430, D_SKL | D_KBL);
+ MMIO_D(_MMIO(0x46430), D_SKL | D_KBL);
- MMIO_D(0x46520, D_SKL | D_KBL);
+ MMIO_D(_MMIO(0x46520), D_SKL | D_KBL);
- MMIO_D(0xc403c, D_SKL | D_KBL);
- MMIO_D(0xb004, D_SKL_PLUS);
+ MMIO_D(_MMIO(0xc403c), D_SKL | D_KBL);
+ MMIO_D(_MMIO(0xb004), D_SKL_PLUS);
MMIO_DH(DMA_CTRL, D_SKL_PLUS, NULL, dma_ctrl_write);
- MMIO_D(0x65900, D_SKL_PLUS);
- MMIO_D(0x1082c0, D_SKL | D_KBL);
- MMIO_D(0x4068, D_SKL | D_KBL);
- MMIO_D(0x67054, D_SKL | D_KBL);
- MMIO_D(0x6e560, D_SKL | D_KBL);
- MMIO_D(0x6e554, D_SKL | D_KBL);
- MMIO_D(0x2b20, D_SKL | D_KBL);
- MMIO_D(0x65f00, D_SKL | D_KBL);
- MMIO_D(0x65f08, D_SKL | D_KBL);
- MMIO_D(0x320f0, D_SKL | D_KBL);
-
- MMIO_D(0x70034, D_SKL_PLUS);
- MMIO_D(0x71034, D_SKL_PLUS);
- MMIO_D(0x72034, D_SKL_PLUS);
-
- MMIO_D(_PLANE_KEYVAL_1(PIPE_A), D_SKL_PLUS);
- MMIO_D(_PLANE_KEYVAL_1(PIPE_B), D_SKL_PLUS);
- MMIO_D(_PLANE_KEYVAL_1(PIPE_C), D_SKL_PLUS);
- MMIO_D(_PLANE_KEYMSK_1(PIPE_A), D_SKL_PLUS);
- MMIO_D(_PLANE_KEYMSK_1(PIPE_B), D_SKL_PLUS);
- MMIO_D(_PLANE_KEYMSK_1(PIPE_C), D_SKL_PLUS);
-
- MMIO_D(0x44500, D_SKL_PLUS);
+ MMIO_D(_MMIO(0x65900), D_SKL_PLUS);
+ MMIO_D(_MMIO(0x1082c0), D_SKL | D_KBL);
+ MMIO_D(_MMIO(0x4068), D_SKL | D_KBL);
+ MMIO_D(_MMIO(0x67054), D_SKL | D_KBL);
+ MMIO_D(_MMIO(0x6e560), D_SKL | D_KBL);
+ MMIO_D(_MMIO(0x6e554), D_SKL | D_KBL);
+ MMIO_D(_MMIO(0x2b20), D_SKL | D_KBL);
+ MMIO_D(_MMIO(0x65f00), D_SKL | D_KBL);
+ MMIO_D(_MMIO(0x65f08), D_SKL | D_KBL);
+ MMIO_D(_MMIO(0x320f0), D_SKL | D_KBL);
+
+ MMIO_D(_MMIO(0x70034), D_SKL_PLUS);
+ MMIO_D(_MMIO(0x71034), D_SKL_PLUS);
+ MMIO_D(_MMIO(0x72034), D_SKL_PLUS);
+
+ MMIO_D(_MMIO(_PLANE_KEYVAL_1(PIPE_A)), D_SKL_PLUS);
+ MMIO_D(_MMIO(_PLANE_KEYVAL_1(PIPE_B)), D_SKL_PLUS);
+ MMIO_D(_MMIO(_PLANE_KEYVAL_1(PIPE_C)), D_SKL_PLUS);
+ MMIO_D(_MMIO(_PLANE_KEYMSK_1(PIPE_A)), D_SKL_PLUS);
+ MMIO_D(_MMIO(_PLANE_KEYMSK_1(PIPE_B)), D_SKL_PLUS);
+ MMIO_D(_MMIO(_PLANE_KEYMSK_1(PIPE_C)), D_SKL_PLUS);
+
+ MMIO_D(_MMIO(0x44500), D_SKL_PLUS);
MMIO_DFH(GEN9_CSFE_CHICKEN1_RCS, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL);
MMIO_DFH(GEN8_HDC_CHICKEN1, D_SKL | D_KBL, F_MODE_MASK | F_CMD_ACCESS,
NULL, NULL);
- MMIO_D(0x4ab8, D_KBL);
- MMIO_D(0x2248, D_SKL_PLUS | D_KBL);
+ MMIO_D(_MMIO(0x4ab8), D_KBL);
+ MMIO_D(_MMIO(0x2248), D_SKL_PLUS | D_KBL);
return 0;
}
@@ -2869,8 +2869,8 @@ static struct gvt_mmio_block *find_mmio_block(struct intel_gvt *gvt,
for (i = 0; i < num; i++, block++) {
if (!(device & block->device))
continue;
- if (offset >= INTEL_GVT_MMIO_OFFSET(block->offset) &&
- offset < INTEL_GVT_MMIO_OFFSET(block->offset) + block->size)
+ if (offset >= i915_mmio_reg_offset(block->offset) &&
+ offset < i915_mmio_reg_offset(block->offset) + block->size)
return block;
}
return NULL;
@@ -2982,8 +2982,8 @@ int intel_gvt_for_each_tracked_mmio(struct intel_gvt *gvt,
for (i = 0; i < gvt->mmio.num_mmio_block; i++, block++) {
for (j = 0; j < block->size; j += 4) {
ret = handler(gvt,
- INTEL_GVT_MMIO_OFFSET(block->offset) + j,
- data);
+ i915_mmio_reg_offset(block->offset) + j,
+ data);
if (ret)
return ret;
}
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index f86983d6655b..45bab5a6290b 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -1360,8 +1360,8 @@ static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
struct kvmgt_guest_info, track_node);
if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa)))
- intel_gvt_ops->emulate_mmio_write(info->vgpu, gpa,
- (void *)val, len);
+ intel_gvt_ops->write_protect_handler(info->vgpu, gpa,
+ (void *)val, len);
}
static void kvmgt_page_track_flush_slot(struct kvm *kvm,
diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c
index f7227a3ad469..562b5ad857a4 100644
--- a/drivers/gpu/drm/i915/gvt/mmio.c
+++ b/drivers/gpu/drm/i915/gvt/mmio.c
@@ -117,25 +117,6 @@ static void failsafe_emulate_mmio_rw(struct intel_vgpu *vgpu, uint64_t pa,
else
memcpy(pt, p_data, bytes);
- } else if (atomic_read(&vgpu->gtt.n_tracked_guest_page)) {
- struct intel_vgpu_page_track *t;
-
- /* Since we enter the failsafe mode early during guest boot,
- * guest may not have chance to set up its ppgtt table, so
- * there should not be any wp pages for guest. Keep the wp
- * related code here in case we need to handle it in furture.
- */
- t = intel_vgpu_find_tracked_page(vgpu, pa >> PAGE_SHIFT);
- if (t) {
- /* remove write protection to prevent furture traps */
- intel_vgpu_clean_page_track(vgpu, t);
- if (read)
- intel_gvt_hypervisor_read_gpa(vgpu, pa,
- p_data, bytes);
- else
- intel_gvt_hypervisor_write_gpa(vgpu, pa,
- p_data, bytes);
- }
}
mutex_unlock(&gvt->lock);
}
@@ -168,23 +149,6 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa,
goto out;
}
- if (atomic_read(&vgpu->gtt.n_tracked_guest_page)) {
- struct intel_vgpu_page_track *t;
-
- t = intel_vgpu_find_tracked_page(vgpu, pa >> PAGE_SHIFT);
- if (t) {
- ret = intel_gvt_hypervisor_read_gpa(vgpu, pa,
- p_data, bytes);
- if (ret) {
- gvt_vgpu_err("guest page read error %d, "
- "gfn 0x%lx, pa 0x%llx, var 0x%x, len %d\n",
- ret, t->gfn, pa, *(u32 *)p_data,
- bytes);
- }
- goto out;
- }
- }
-
offset = intel_vgpu_gpa_to_mmio_offset(vgpu, pa);
if (WARN_ON(bytes > 8))
@@ -263,23 +227,6 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa,
goto out;
}
- if (atomic_read(&vgpu->gtt.n_tracked_guest_page)) {
- struct intel_vgpu_page_track *t;
-
- t = intel_vgpu_find_tracked_page(vgpu, pa >> PAGE_SHIFT);
- if (t) {
- ret = t->handler(t, pa, p_data, bytes);
- if (ret) {
- gvt_err("guest page write error %d, "
- "gfn 0x%lx, pa 0x%llx, "
- "var 0x%x, len %d\n",
- ret, t->gfn, pa,
- *(u32 *)p_data, bytes);
- }
- goto out;
- }
- }
-
offset = intel_vgpu_gpa_to_mmio_offset(vgpu, pa);
if (WARN_ON(bytes > 8))
@@ -336,10 +283,10 @@ void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu, bool dmlr)
memcpy(vgpu->mmio.vreg, mmio, info->mmio_size);
memcpy(vgpu->mmio.sreg, mmio, info->mmio_size);
- vgpu_vreg(vgpu, GEN6_GT_THREAD_STATUS_REG) = 0;
+ vgpu_vreg_t(vgpu, GEN6_GT_THREAD_STATUS_REG) = 0;
/* set the bit 0:2(Core C-State ) to C0 */
- vgpu_vreg(vgpu, GEN6_GT_CORE_STATUS) = 0;
+ vgpu_vreg_t(vgpu, GEN6_GT_CORE_STATUS) = 0;
vgpu->mmio.disable_warn_untrack = false;
} else {
diff --git a/drivers/gpu/drm/i915/gvt/mmio.h b/drivers/gpu/drm/i915/gvt/mmio.h
index 62709ac351cd..71b620875943 100644
--- a/drivers/gpu/drm/i915/gvt/mmio.h
+++ b/drivers/gpu/drm/i915/gvt/mmio.h
@@ -76,13 +76,6 @@ int intel_gvt_for_each_tracked_mmio(struct intel_gvt *gvt,
int (*handler)(struct intel_gvt *gvt, u32 offset, void *data),
void *data);
-
-#define INTEL_GVT_MMIO_OFFSET(reg) ({ \
- typeof(reg) __reg = reg; \
- u32 *offset = (u32 *)&__reg; \
- *offset; \
-})
-
int intel_vgpu_init_mmio(struct intel_vgpu *vgpu);
void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu, bool dmlr);
void intel_vgpu_clean_mmio(struct intel_vgpu *vgpu);
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index 8a52b56f0e86..74834395dd89 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -149,8 +149,41 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = {
{ /* Terminated */ }
};
-static u32 gen9_render_mocs[I915_NUM_ENGINES][64];
-static u32 gen9_render_mocs_L3[32];
+static struct {
+ bool initialized;
+ u32 control_table[I915_NUM_ENGINES][64];
+ u32 l3cc_table[32];
+} gen9_render_mocs;
+
+static void load_render_mocs(struct drm_i915_private *dev_priv)
+{
+ i915_reg_t offset;
+ u32 regs[] = {
+ [RCS] = 0xc800,
+ [VCS] = 0xc900,
+ [VCS2] = 0xca00,
+ [BCS] = 0xcc00,
+ [VECS] = 0xcb00,
+ };
+ int ring_id, i;
+
+ for (ring_id = 0; ring_id < I915_NUM_ENGINES; ring_id++) {
+ offset.reg = regs[ring_id];
+ for (i = 0; i < 64; i++) {
+ gen9_render_mocs.control_table[ring_id][i] =
+ I915_READ_FW(offset);
+ offset.reg += 4;
+ }
+ }
+
+ offset.reg = 0xb020;
+ for (i = 0; i < 32; i++) {
+ gen9_render_mocs.l3cc_table[i] =
+ I915_READ_FW(offset);
+ offset.reg += 4;
+ }
+ gen9_render_mocs.initialized = true;
+}
static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id)
{
@@ -191,17 +224,20 @@ static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id)
if (wait_for_atomic((I915_READ_FW(reg) == 0), 50))
gvt_vgpu_err("timeout in invalidate ring (%d) tlb\n", ring_id);
else
- vgpu_vreg(vgpu, regs[ring_id]) = 0;
+ vgpu_vreg_t(vgpu, reg) = 0;
intel_uncore_forcewake_put(dev_priv, fw);
gvt_dbg_core("invalidate TLB for ring %d\n", ring_id);
}
-static void load_mocs(struct intel_vgpu *vgpu, int ring_id)
+static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next,
+ int ring_id)
{
- struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
+ struct drm_i915_private *dev_priv;
i915_reg_t offset, l3_offset;
+ u32 old_v, new_v;
+
u32 regs[] = {
[RCS] = 0xc800,
[VCS] = 0xc900,
@@ -211,54 +247,45 @@ static void load_mocs(struct intel_vgpu *vgpu, int ring_id)
};
int i;
+ dev_priv = pre ? pre->gvt->dev_priv : next->gvt->dev_priv;
if (WARN_ON(ring_id >= ARRAY_SIZE(regs)))
return;
+ if (!pre && !gen9_render_mocs.initialized)
+ load_render_mocs(dev_priv);
+
offset.reg = regs[ring_id];
for (i = 0; i < 64; i++) {
- gen9_render_mocs[ring_id][i] = I915_READ_FW(offset);
- I915_WRITE_FW(offset, vgpu_vreg(vgpu, offset));
- offset.reg += 4;
- }
-
- if (ring_id == RCS) {
- l3_offset.reg = 0xb020;
- for (i = 0; i < 32; i++) {
- gen9_render_mocs_L3[i] = I915_READ_FW(l3_offset);
- I915_WRITE_FW(l3_offset, vgpu_vreg(vgpu, l3_offset));
- l3_offset.reg += 4;
- }
- }
-}
+ if (pre)
+ old_v = vgpu_vreg_t(pre, offset);
+ else
+ old_v = gen9_render_mocs.control_table[ring_id][i];
+ if (next)
+ new_v = vgpu_vreg_t(next, offset);
+ else
+ new_v = gen9_render_mocs.control_table[ring_id][i];
-static void restore_mocs(struct intel_vgpu *vgpu, int ring_id)
-{
- struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
- i915_reg_t offset, l3_offset;
- u32 regs[] = {
- [RCS] = 0xc800,
- [VCS] = 0xc900,
- [VCS2] = 0xca00,
- [BCS] = 0xcc00,
- [VECS] = 0xcb00,
- };
- int i;
+ if (old_v != new_v)
+ I915_WRITE_FW(offset, new_v);
- if (WARN_ON(ring_id >= ARRAY_SIZE(regs)))
- return;
-
- offset.reg = regs[ring_id];
- for (i = 0; i < 64; i++) {
- vgpu_vreg(vgpu, offset) = I915_READ_FW(offset);
- I915_WRITE_FW(offset, gen9_render_mocs[ring_id][i]);
offset.reg += 4;
}
if (ring_id == RCS) {
l3_offset.reg = 0xb020;
for (i = 0; i < 32; i++) {
- vgpu_vreg(vgpu, l3_offset) = I915_READ_FW(l3_offset);
- I915_WRITE_FW(l3_offset, gen9_render_mocs_L3[i]);
+ if (pre)
+ old_v = vgpu_vreg_t(pre, l3_offset);
+ else
+ old_v = gen9_render_mocs.l3cc_table[i];
+ if (next)
+ new_v = vgpu_vreg_t(next, l3_offset);
+ else
+ new_v = gen9_render_mocs.l3cc_table[i];
+
+ if (old_v != new_v)
+ I915_WRITE_FW(l3_offset, new_v);
+
l3_offset.reg += 4;
}
}
@@ -266,84 +293,77 @@ static void restore_mocs(struct intel_vgpu *vgpu, int ring_id)
#define CTX_CONTEXT_CONTROL_VAL 0x03
-/* Switch ring mmio values (context) from host to a vgpu. */
-static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id)
+/* Switch ring mmio values (context). */
+static void switch_mmio(struct intel_vgpu *pre,
+ struct intel_vgpu *next,
+ int ring_id)
{
- struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
- struct intel_vgpu_submission *s = &vgpu->submission;
- u32 *reg_state = s->shadow_ctx->engine[ring_id].lrc_reg_state;
- u32 ctx_ctrl = reg_state[CTX_CONTEXT_CONTROL_VAL];
+ struct drm_i915_private *dev_priv;
+ struct intel_vgpu_submission *s;
+ u32 *reg_state, ctx_ctrl;
u32 inhibit_mask =
_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
struct engine_mmio *mmio;
- u32 v;
-
- if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
- load_mocs(vgpu, ring_id);
-
- mmio = vgpu->gvt->engine_mmio_list;
- while (i915_mmio_reg_offset((mmio++)->reg)) {
- if (mmio->ring_id != ring_id)
- continue;
-
- mmio->value = I915_READ_FW(mmio->reg);
-
- /*
- * if it is an inhibit context, load in_context mmio
- * into HW by mmio write. If it is not, skip this mmio
- * write.
- */
- if (mmio->in_context &&
- (ctx_ctrl & inhibit_mask) != inhibit_mask)
- continue;
-
- if (mmio->mask)
- v = vgpu_vreg(vgpu, mmio->reg) | (mmio->mask << 16);
- else
- v = vgpu_vreg(vgpu, mmio->reg);
-
- I915_WRITE_FW(mmio->reg, v);
-
- trace_render_mmio(vgpu->id, "load",
- i915_mmio_reg_offset(mmio->reg),
- mmio->value, v);
- }
-
- handle_tlb_pending_event(vgpu, ring_id);
-}
-
-/* Switch ring mmio values (context) from vgpu to host. */
-static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id)
-{
- struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
- struct engine_mmio *mmio;
- u32 v;
+ u32 old_v, new_v;
+ dev_priv = pre ? pre->gvt->dev_priv : next->gvt->dev_priv;
if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
- restore_mocs(vgpu, ring_id);
+ switch_mocs(pre, next, ring_id);
- mmio = vgpu->gvt->engine_mmio_list;
+ mmio = dev_priv->gvt->engine_mmio_list;
while (i915_mmio_reg_offset((mmio++)->reg)) {
if (mmio->ring_id != ring_id)
continue;
-
- vgpu_vreg(vgpu, mmio->reg) = I915_READ_FW(mmio->reg);
-
- if (mmio->mask) {
- vgpu_vreg(vgpu, mmio->reg) &= ~(mmio->mask << 16);
- v = mmio->value | (mmio->mask << 16);
+ // save
+ if (pre) {
+ vgpu_vreg_t(pre, mmio->reg) = I915_READ_FW(mmio->reg);
+ if (mmio->mask)
+ vgpu_vreg_t(pre, mmio->reg) &=
+ ~(mmio->mask << 16);
+ old_v = vgpu_vreg_t(pre, mmio->reg);
} else
- v = mmio->value;
-
- if (mmio->in_context)
- continue;
+ old_v = mmio->value = I915_READ_FW(mmio->reg);
+
+ // restore
+ if (next) {
+ s = &next->submission;
+ reg_state =
+ s->shadow_ctx->engine[ring_id].lrc_reg_state;
+ ctx_ctrl = reg_state[CTX_CONTEXT_CONTROL_VAL];
+ /*
+ * if it is an inhibit context, load in_context mmio
+ * into HW by mmio write. If it is not, skip this mmio
+ * write.
+ */
+ if (mmio->in_context &&
+ (ctx_ctrl & inhibit_mask) != inhibit_mask)
+ continue;
+
+ if (mmio->mask)
+ new_v = vgpu_vreg_t(next, mmio->reg) |
+ (mmio->mask << 16);
+ else
+ new_v = vgpu_vreg_t(next, mmio->reg);
+ } else {
+ if (mmio->in_context)
+ continue;
+ if (mmio->mask)
+ new_v = mmio->value | (mmio->mask << 16);
+ else
+ new_v = mmio->value;
+ }
- I915_WRITE_FW(mmio->reg, v);
+ I915_WRITE_FW(mmio->reg, new_v);
- trace_render_mmio(vgpu->id, "restore",
+ trace_render_mmio(pre ? pre->id : 0,
+ next ? next->id : 0,
+ "switch",
i915_mmio_reg_offset(mmio->reg),
- mmio->value, v);
+ old_v, new_v);
}
+
+ if (next)
+ handle_tlb_pending_event(next, ring_id);
}
/**
@@ -374,17 +394,7 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre,
* handle forcewake mannually.
*/
intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-
- /**
- * TODO: Optimize for vGPU to vGPU switch by merging
- * switch_mmio_to_host() and switch_mmio_to_vgpu().
- */
- if (pre)
- switch_mmio_to_host(pre, ring_id);
-
- if (next)
- switch_mmio_to_vgpu(next, ring_id);
-
+ switch_mmio(pre, next, ring_id);
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
}
diff --git a/drivers/gpu/drm/i915/gvt/trace.h b/drivers/gpu/drm/i915/gvt/trace.h
index 8c150381d9a4..7a2511538f34 100644
--- a/drivers/gpu/drm/i915/gvt/trace.h
+++ b/drivers/gpu/drm/i915/gvt/trace.h
@@ -330,13 +330,14 @@ TRACE_EVENT(inject_msi,
);
TRACE_EVENT(render_mmio,
- TP_PROTO(int id, char *action, unsigned int reg,
+ TP_PROTO(int old_id, int new_id, char *action, unsigned int reg,
unsigned int old_val, unsigned int new_val),
- TP_ARGS(id, action, reg, new_val, old_val),
+ TP_ARGS(old_id, new_id, action, reg, new_val, old_val),
TP_STRUCT__entry(
- __field(int, id)
+ __field(int, old_id)
+ __field(int, new_id)
__array(char, buf, GVT_TEMP_STR_LEN)
__field(unsigned int, reg)
__field(unsigned int, old_val)
@@ -344,15 +345,17 @@ TRACE_EVENT(render_mmio,
),
TP_fast_assign(
- __entry->id = id;
+ __entry->old_id = old_id;
+ __entry->new_id = new_id;
snprintf(__entry->buf, GVT_TEMP_STR_LEN, "%s", action);
__entry->reg = reg;
__entry->old_val = old_val;
__entry->new_val = new_val;
),
- TP_printk("VM%u %s reg %x, old %08x new %08x\n",
- __entry->id, __entry->buf, __entry->reg,
+ TP_printk("VM%u -> VM%u %s reg %x, old %08x new %08x\n",
+ __entry->old_id, __entry->new_id,
+ __entry->buf, __entry->reg,
__entry->old_val, __entry->new_val)
);
diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
index 39926176fbeb..4688619f6a1c 100644
--- a/drivers/gpu/drm/i915/gvt/vgpu.c
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@@ -38,25 +38,25 @@
void populate_pvinfo_page(struct intel_vgpu *vgpu)
{
/* setup the ballooning information */
- vgpu_vreg64(vgpu, vgtif_reg(magic)) = VGT_MAGIC;
- vgpu_vreg(vgpu, vgtif_reg(version_major)) = 1;
- vgpu_vreg(vgpu, vgtif_reg(version_minor)) = 0;
- vgpu_vreg(vgpu, vgtif_reg(display_ready)) = 0;
- vgpu_vreg(vgpu, vgtif_reg(vgt_id)) = vgpu->id;
+ vgpu_vreg64_t(vgpu, vgtif_reg(magic)) = VGT_MAGIC;
+ vgpu_vreg_t(vgpu, vgtif_reg(version_major)) = 1;
+ vgpu_vreg_t(vgpu, vgtif_reg(version_minor)) = 0;
+ vgpu_vreg_t(vgpu, vgtif_reg(display_ready)) = 0;
+ vgpu_vreg_t(vgpu, vgtif_reg(vgt_id)) = vgpu->id;
- vgpu_vreg(vgpu, vgtif_reg(vgt_caps)) = VGT_CAPS_FULL_48BIT_PPGTT;
- vgpu_vreg(vgpu, vgtif_reg(vgt_caps)) |= VGT_CAPS_HWSP_EMULATION;
+ vgpu_vreg_t(vgpu, vgtif_reg(vgt_caps)) = VGT_CAPS_FULL_48BIT_PPGTT;
+ vgpu_vreg_t(vgpu, vgtif_reg(vgt_caps)) |= VGT_CAPS_HWSP_EMULATION;
- vgpu_vreg(vgpu, vgtif_reg(avail_rs.mappable_gmadr.base)) =
+ vgpu_vreg_t(vgpu, vgtif_reg(avail_rs.mappable_gmadr.base)) =
vgpu_aperture_gmadr_base(vgpu);
- vgpu_vreg(vgpu, vgtif_reg(avail_rs.mappable_gmadr.size)) =
+ vgpu_vreg_t(vgpu, vgtif_reg(avail_rs.mappable_gmadr.size)) =
vgpu_aperture_sz(vgpu);
- vgpu_vreg(vgpu, vgtif_reg(avail_rs.nonmappable_gmadr.base)) =
+ vgpu_vreg_t(vgpu, vgtif_reg(avail_rs.nonmappable_gmadr.base)) =
vgpu_hidden_gmadr_base(vgpu);
- vgpu_vreg(vgpu, vgtif_reg(avail_rs.nonmappable_gmadr.size)) =
+ vgpu_vreg_t(vgpu, vgtif_reg(avail_rs.nonmappable_gmadr.size)) =
vgpu_hidden_sz(vgpu);
- vgpu_vreg(vgpu, vgtif_reg(avail_rs.fence_num)) = vgpu_fence_sz(vgpu);
+ vgpu_vreg_t(vgpu, vgtif_reg(avail_rs.fence_num)) = vgpu_fence_sz(vgpu);
gvt_dbg_core("Populate PVINFO PAGE for vGPU %d\n", vgpu->id);
gvt_dbg_core("aperture base [GMADR] 0x%llx size 0x%llx\n",
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index d8c6ec3cca71..e968aeae1d84 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -37,40 +37,21 @@ static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node)
return to_i915(node->minor->dev);
}
-static __always_inline void seq_print_param(struct seq_file *m,
- const char *name,
- const char *type,
- const void *x)
-{
- if (!__builtin_strcmp(type, "bool"))
- seq_printf(m, "i915.%s=%s\n", name, yesno(*(const bool *)x));
- else if (!__builtin_strcmp(type, "int"))
- seq_printf(m, "i915.%s=%d\n", name, *(const int *)x);
- else if (!__builtin_strcmp(type, "unsigned int"))
- seq_printf(m, "i915.%s=%u\n", name, *(const unsigned int *)x);
- else if (!__builtin_strcmp(type, "char *"))
- seq_printf(m, "i915.%s=%s\n", name, *(const char **)x);
- else
- BUILD_BUG();
-}
-
static int i915_capabilities(struct seq_file *m, void *data)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
const struct intel_device_info *info = INTEL_INFO(dev_priv);
+ struct drm_printer p = drm_seq_file_printer(m);
seq_printf(m, "gen: %d\n", INTEL_GEN(dev_priv));
seq_printf(m, "platform: %s\n", intel_platform_name(info->platform));
seq_printf(m, "pch: %d\n", INTEL_PCH_TYPE(dev_priv));
-#define PRINT_FLAG(x) seq_printf(m, #x ": %s\n", yesno(info->x))
- DEV_INFO_FOR_EACH_FLAG(PRINT_FLAG);
-#undef PRINT_FLAG
+ intel_device_info_dump_flags(info, &p);
+ intel_device_info_dump_runtime(info, &p);
kernel_param_lock(THIS_MODULE);
-#define PRINT_PARAM(T, x, ...) seq_print_param(m, #x, #T, &i915_modparams.x);
- I915_PARAMS_FOR_EACH(PRINT_PARAM);
-#undef PRINT_PARAM
+ i915_params_dump(&i915_modparams, &p);
kernel_param_unlock(THIS_MODULE);
return 0;
@@ -1601,20 +1582,23 @@ static int i915_frontbuffer_tracking(struct seq_file *m, void *unused)
static int i915_fbc_status(struct seq_file *m, void *unused)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
+ struct intel_fbc *fbc = &dev_priv->fbc;
- if (!HAS_FBC(dev_priv)) {
- seq_puts(m, "FBC unsupported on this chipset\n");
- return 0;
- }
+ if (!HAS_FBC(dev_priv))
+ return -ENODEV;
intel_runtime_pm_get(dev_priv);
- mutex_lock(&dev_priv->fbc.lock);
+ mutex_lock(&fbc->lock);
if (intel_fbc_is_active(dev_priv))
seq_puts(m, "FBC enabled\n");
else
- seq_printf(m, "FBC disabled: %s\n",
- dev_priv->fbc.no_fbc_reason);
+ seq_printf(m, "FBC disabled: %s\n", fbc->no_fbc_reason);
+
+ if (fbc->work.scheduled)
+ seq_printf(m, "FBC worker scheduled on vblank %u, now %llu\n",
+ fbc->work.scheduled_vblank,
+ drm_crtc_vblank_count(&fbc->crtc->base));
if (intel_fbc_is_active(dev_priv)) {
u32 mask;
@@ -1634,7 +1618,7 @@ static int i915_fbc_status(struct seq_file *m, void *unused)
seq_printf(m, "Compressing: %s\n", yesno(mask));
}
- mutex_unlock(&dev_priv->fbc.lock);
+ mutex_unlock(&fbc->lock);
intel_runtime_pm_put(dev_priv);
return 0;
@@ -1681,10 +1665,8 @@ static int i915_ips_status(struct seq_file *m, void *unused)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
- if (!HAS_IPS(dev_priv)) {
- seq_puts(m, "not supported\n");
- return 0;
- }
+ if (!HAS_IPS(dev_priv))
+ return -ENODEV;
intel_runtime_pm_get(dev_priv);
@@ -1770,10 +1752,8 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused)
int gpu_freq, ia_freq;
unsigned int max_gpu_freq, min_gpu_freq;
- if (!HAS_LLC(dev_priv)) {
- seq_puts(m, "unsupported on this chipset\n");
- return 0;
- }
+ if (!HAS_LLC(dev_priv))
+ return -ENODEV;
intel_runtime_pm_get(dev_priv);
@@ -2253,8 +2233,8 @@ static int i915_huc_load_status_info(struct seq_file *m, void *data)
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct drm_printer p;
- if (!HAS_HUC_UCODE(dev_priv))
- return 0;
+ if (!HAS_HUC(dev_priv))
+ return -ENODEV;
p = drm_seq_file_printer(m);
intel_uc_fw_dump(&dev_priv->huc.fw, &p);
@@ -2272,8 +2252,8 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data)
struct drm_printer p;
u32 tmp, i;
- if (!HAS_GUC_UCODE(dev_priv))
- return 0;
+ if (!HAS_GUC(dev_priv))
+ return -ENODEV;
p = drm_seq_file_printer(m);
intel_uc_fw_dump(&dev_priv->guc.fw, &p);
@@ -2346,29 +2326,16 @@ static void i915_guc_client_info(struct seq_file *m,
seq_printf(m, "\tTotal: %llu\n", tot);
}
-static bool check_guc_submission(struct seq_file *m)
-{
- struct drm_i915_private *dev_priv = node_to_i915(m->private);
- const struct intel_guc *guc = &dev_priv->guc;
-
- if (!guc->execbuf_client) {
- seq_printf(m, "GuC submission %s\n",
- HAS_GUC_SCHED(dev_priv) ?
- "disabled" :
- "not supported");
- return false;
- }
-
- return true;
-}
-
static int i915_guc_info(struct seq_file *m, void *data)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
const struct intel_guc *guc = &dev_priv->guc;
- if (!check_guc_submission(m))
- return 0;
+ if (!USES_GUC_SUBMISSION(dev_priv))
+ return -ENODEV;
+
+ GEM_BUG_ON(!guc->execbuf_client);
+ GEM_BUG_ON(!guc->preempt_client);
seq_printf(m, "Doorbell map:\n");
seq_printf(m, "\t%*pb\n", GUC_NUM_DOORBELLS, guc->doorbell_bitmap);
@@ -2395,8 +2362,8 @@ static int i915_guc_stage_pool(struct seq_file *m, void *data)
unsigned int tmp;
int index;
- if (!check_guc_submission(m))
- return 0;
+ if (!USES_GUC_SUBMISSION(dev_priv))
+ return -ENODEV;
for (index = 0; index < GUC_MAX_STAGE_DESCRIPTORS; index++, desc++) {
struct intel_engine_cs *engine;
@@ -2449,6 +2416,9 @@ static int i915_guc_log_dump(struct seq_file *m, void *data)
u32 *log;
int i = 0;
+ if (!HAS_GUC(dev_priv))
+ return -ENODEV;
+
if (dump_load_err)
obj = dev_priv->guc.load_err_log;
else if (dev_priv->guc.log.vma)
@@ -2480,6 +2450,9 @@ static int i915_guc_log_control_get(void *data, u64 *val)
{
struct drm_i915_private *dev_priv = data;
+ if (!HAS_GUC(dev_priv))
+ return -ENODEV;
+
if (!dev_priv->guc.log.vma)
return -EINVAL;
@@ -2493,6 +2466,9 @@ static int i915_guc_log_control_set(void *data, u64 val)
struct drm_i915_private *dev_priv = data;
int ret;
+ if (!HAS_GUC(dev_priv))
+ return -ENODEV;
+
if (!dev_priv->guc.log.vma)
return -EINVAL;
@@ -2543,10 +2519,8 @@ static int i915_edp_psr_status(struct seq_file *m, void *data)
enum pipe pipe;
bool enabled = false;
- if (!HAS_PSR(dev_priv)) {
- seq_puts(m, "PSR not supported\n");
- return 0;
- }
+ if (!HAS_PSR(dev_priv))
+ return -ENODEV;
intel_runtime_pm_get(dev_priv);
@@ -2785,10 +2759,8 @@ static int i915_dmc_info(struct seq_file *m, void *unused)
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct intel_csr *csr;
- if (!HAS_CSR(dev_priv)) {
- seq_puts(m, "not supported\n");
- return 0;
- }
+ if (!HAS_CSR(dev_priv))
+ return -ENODEV;
csr = &dev_priv->csr;
@@ -3324,7 +3296,7 @@ static int i915_ddb_info(struct seq_file *m, void *unused)
int plane;
if (INTEL_GEN(dev_priv) < 9)
- return 0;
+ return -ENODEV;
drm_modeset_lock_all(dev);
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index ca9f4b2862eb..6c8da9d20c33 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -931,8 +931,6 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
intel_display_crc_init(dev_priv);
- intel_device_info_dump(dev_priv);
-
intel_detect_preproduction_hw(dev_priv);
return 0;
@@ -1084,7 +1082,7 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
if (i915_inject_load_failure())
return -ENODEV;
- intel_device_info_runtime_init(dev_priv);
+ intel_device_info_runtime_init(mkwrite_device_info(dev_priv));
intel_sanitize_options(dev_priv);
@@ -1294,6 +1292,21 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv)
i915_gem_shrinker_unregister(dev_priv);
}
+static void i915_welcome_messages(struct drm_i915_private *dev_priv)
+{
+ if (drm_debug & DRM_UT_DRIVER) {
+ struct drm_printer p = drm_debug_printer("i915 device info:");
+
+ intel_device_info_dump(&dev_priv->info, &p);
+ intel_device_info_dump_runtime(&dev_priv->info, &p);
+ }
+
+ if (IS_ENABLED(CONFIG_DRM_I915_DEBUG))
+ DRM_INFO("DRM_I915_DEBUG enabled\n");
+ if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+ DRM_INFO("DRM_I915_DEBUG_GEM enabled\n");
+}
+
/**
* i915_driver_load - setup chip and create an initial config
* @pdev: PCI device
@@ -1379,13 +1392,10 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent)
intel_init_ipc(dev_priv);
- if (IS_ENABLED(CONFIG_DRM_I915_DEBUG))
- DRM_INFO("DRM_I915_DEBUG enabled\n");
- if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
- DRM_INFO("DRM_I915_DEBUG_GEM enabled\n");
-
intel_runtime_pm_put(dev_priv);
+ i915_welcome_messages(dev_priv);
+
return 0;
out_cleanup_hw:
@@ -1924,9 +1934,6 @@ void i915_reset(struct drm_i915_private *i915, unsigned int flags)
goto taint;
}
- i915_gem_reset(i915);
- intel_overlay_reset(i915);
-
/* Ok, now get things going again... */
/*
@@ -1939,6 +1946,9 @@ void i915_reset(struct drm_i915_private *i915, unsigned int flags)
goto error;
}
+ i915_gem_reset(i915);
+ intel_overlay_reset(i915);
+
/*
* Next we need to restore the context, but we don't use those
* yet either...
@@ -2011,19 +2021,19 @@ int i915_reset_engine(struct intel_engine_cs *engine, unsigned int flags)
GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags));
+ active_request = i915_gem_reset_prepare_engine(engine);
+ if (IS_ERR_OR_NULL(active_request)) {
+ /* Either the previous reset failed, or we pardon the reset. */
+ ret = PTR_ERR(active_request);
+ goto out;
+ }
+
if (!(flags & I915_RESET_QUIET)) {
dev_notice(engine->i915->drm.dev,
"Resetting %s after gpu hang\n", engine->name);
}
error->reset_engine_count[engine->id]++;
- active_request = i915_gem_reset_prepare_engine(engine);
- if (IS_ERR(active_request)) {
- DRM_DEBUG_DRIVER("Previous reset failed, promote to full reset\n");
- ret = PTR_ERR(active_request);
- goto out;
- }
-
if (!engine->i915->guc.execbuf_client)
ret = intel_gt_reset_engine(engine->i915, engine);
else
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1aba5657f5f0..caebd5825279 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -56,12 +56,15 @@
#include "i915_reg.h"
#include "i915_utils.h"
-#include "intel_uncore.h"
#include "intel_bios.h"
+#include "intel_device_info.h"
+#include "intel_display.h"
#include "intel_dpll_mgr.h"
-#include "intel_uc.h"
#include "intel_lrc.h"
+#include "intel_opregion.h"
#include "intel_ringbuffer.h"
+#include "intel_uncore.h"
+#include "intel_uc.h"
#include "i915_gem.h"
#include "i915_gem_context.h"
@@ -80,8 +83,8 @@
#define DRIVER_NAME "i915"
#define DRIVER_DESC "Intel Graphics"
-#define DRIVER_DATE "20171214"
-#define DRIVER_TIMESTAMP 1513282202
+#define DRIVER_DATE "20171222"
+#define DRIVER_TIMESTAMP 1513971710
/* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and
* WARN_ON()) for hw state sanity checks to check for unexpected conditions
@@ -243,174 +246,6 @@ static inline uint_fixed_16_16_t add_fixed16_u32(uint_fixed_16_16_t add1,
return clamp_u64_to_fixed16(interm_sum);
}
-static inline const char *yesno(bool v)
-{
- return v ? "yes" : "no";
-}
-
-static inline const char *onoff(bool v)
-{
- return v ? "on" : "off";
-}
-
-static inline const char *enableddisabled(bool v)
-{
- return v ? "enabled" : "disabled";
-}
-
-enum pipe {
- INVALID_PIPE = -1,
- PIPE_A = 0,
- PIPE_B,
- PIPE_C,
- _PIPE_EDP,
- I915_MAX_PIPES = _PIPE_EDP
-};
-#define pipe_name(p) ((p) + 'A')
-
-enum transcoder {
- TRANSCODER_A = 0,
- TRANSCODER_B,
- TRANSCODER_C,
- TRANSCODER_EDP,
- TRANSCODER_DSI_A,
- TRANSCODER_DSI_C,
- I915_MAX_TRANSCODERS
-};
-
-static inline const char *transcoder_name(enum transcoder transcoder)
-{
- switch (transcoder) {
- case TRANSCODER_A:
- return "A";
- case TRANSCODER_B:
- return "B";
- case TRANSCODER_C:
- return "C";
- case TRANSCODER_EDP:
- return "EDP";
- case TRANSCODER_DSI_A:
- return "DSI A";
- case TRANSCODER_DSI_C:
- return "DSI C";
- default:
- return "<invalid>";
- }
-}
-
-static inline bool transcoder_is_dsi(enum transcoder transcoder)
-{
- return transcoder == TRANSCODER_DSI_A || transcoder == TRANSCODER_DSI_C;
-}
-
-/*
- * Global legacy plane identifier. Valid only for primary/sprite
- * planes on pre-g4x, and only for primary planes on g4x-bdw.
- */
-enum i9xx_plane_id {
- PLANE_A,
- PLANE_B,
- PLANE_C,
-};
-#define plane_name(p) ((p) + 'A')
-
-#define sprite_name(p, s) ((p) * INTEL_INFO(dev_priv)->num_sprites[(p)] + (s) + 'A')
-
-/*
- * Per-pipe plane identifier.
- * I915_MAX_PLANES in the enum below is the maximum (across all platforms)
- * number of planes per CRTC. Not all platforms really have this many planes,
- * which means some arrays of size I915_MAX_PLANES may have unused entries
- * between the topmost sprite plane and the cursor plane.
- *
- * This is expected to be passed to various register macros
- * (eg. PLANE_CTL(), PS_PLANE_SEL(), etc.) so adjust with care.
- */
-enum plane_id {
- PLANE_PRIMARY,
- PLANE_SPRITE0,
- PLANE_SPRITE1,
- PLANE_SPRITE2,
- PLANE_CURSOR,
- I915_MAX_PLANES,
-};
-
-#define for_each_plane_id_on_crtc(__crtc, __p) \
- for ((__p) = PLANE_PRIMARY; (__p) < I915_MAX_PLANES; (__p)++) \
- for_each_if ((__crtc)->plane_ids_mask & BIT(__p))
-
-enum port {
- PORT_NONE = -1,
- PORT_A = 0,
- PORT_B,
- PORT_C,
- PORT_D,
- PORT_E,
- I915_MAX_PORTS
-};
-#define port_name(p) ((p) + 'A')
-
-#define I915_NUM_PHYS_VLV 2
-
-enum dpio_channel {
- DPIO_CH0,
- DPIO_CH1
-};
-
-enum dpio_phy {
- DPIO_PHY0,
- DPIO_PHY1,
- DPIO_PHY2,
-};
-
-enum intel_display_power_domain {
- POWER_DOMAIN_PIPE_A,
- POWER_DOMAIN_PIPE_B,
- POWER_DOMAIN_PIPE_C,
- POWER_DOMAIN_PIPE_A_PANEL_FITTER,
- POWER_DOMAIN_PIPE_B_PANEL_FITTER,
- POWER_DOMAIN_PIPE_C_PANEL_FITTER,
- POWER_DOMAIN_TRANSCODER_A,
- POWER_DOMAIN_TRANSCODER_B,
- POWER_DOMAIN_TRANSCODER_C,
- POWER_DOMAIN_TRANSCODER_EDP,
- POWER_DOMAIN_TRANSCODER_DSI_A,
- POWER_DOMAIN_TRANSCODER_DSI_C,
- POWER_DOMAIN_PORT_DDI_A_LANES,
- POWER_DOMAIN_PORT_DDI_B_LANES,
- POWER_DOMAIN_PORT_DDI_C_LANES,
- POWER_DOMAIN_PORT_DDI_D_LANES,
- POWER_DOMAIN_PORT_DDI_E_LANES,
- POWER_DOMAIN_PORT_DDI_A_IO,
- POWER_DOMAIN_PORT_DDI_B_IO,
- POWER_DOMAIN_PORT_DDI_C_IO,
- POWER_DOMAIN_PORT_DDI_D_IO,
- POWER_DOMAIN_PORT_DDI_E_IO,
- POWER_DOMAIN_PORT_DSI,
- POWER_DOMAIN_PORT_CRT,
- POWER_DOMAIN_PORT_OTHER,
- POWER_DOMAIN_VGA,
- POWER_DOMAIN_AUDIO,
- POWER_DOMAIN_PLLS,
- POWER_DOMAIN_AUX_A,
- POWER_DOMAIN_AUX_B,
- POWER_DOMAIN_AUX_C,
- POWER_DOMAIN_AUX_D,
- POWER_DOMAIN_GMBUS,
- POWER_DOMAIN_MODESET,
- POWER_DOMAIN_GT_IRQ,
- POWER_DOMAIN_INIT,
-
- POWER_DOMAIN_NUM,
-};
-
-#define POWER_DOMAIN_PIPE(pipe) ((pipe) + POWER_DOMAIN_PIPE_A)
-#define POWER_DOMAIN_PIPE_PANEL_FITTER(pipe) \
- ((pipe) + POWER_DOMAIN_PIPE_A_PANEL_FITTER)
-#define POWER_DOMAIN_TRANSCODER(tran) \
- ((tran) == TRANSCODER_EDP ? POWER_DOMAIN_TRANSCODER_EDP : \
- (tran) + POWER_DOMAIN_TRANSCODER_A)
-
enum hpd_pin {
HPD_NONE = 0,
HPD_TV = HPD_NONE, /* TV is known to be unreliable */
@@ -472,121 +307,6 @@ struct i915_hotplug {
I915_GEM_DOMAIN_INSTRUCTION | \
I915_GEM_DOMAIN_VERTEX)
-#define for_each_pipe(__dev_priv, __p) \
- for ((__p) = 0; (__p) < INTEL_INFO(__dev_priv)->num_pipes; (__p)++)
-#define for_each_pipe_masked(__dev_priv, __p, __mask) \
- for ((__p) = 0; (__p) < INTEL_INFO(__dev_priv)->num_pipes; (__p)++) \
- for_each_if ((__mask) & (1 << (__p)))
-#define for_each_universal_plane(__dev_priv, __pipe, __p) \
- for ((__p) = 0; \
- (__p) < INTEL_INFO(__dev_priv)->num_sprites[(__pipe)] + 1; \
- (__p)++)
-#define for_each_sprite(__dev_priv, __p, __s) \
- for ((__s) = 0; \
- (__s) < INTEL_INFO(__dev_priv)->num_sprites[(__p)]; \
- (__s)++)
-
-#define for_each_port_masked(__port, __ports_mask) \
- for ((__port) = PORT_A; (__port) < I915_MAX_PORTS; (__port)++) \
- for_each_if ((__ports_mask) & (1 << (__port)))
-
-#define for_each_crtc(dev, crtc) \
- list_for_each_entry(crtc, &(dev)->mode_config.crtc_list, head)
-
-#define for_each_intel_plane(dev, intel_plane) \
- list_for_each_entry(intel_plane, \
- &(dev)->mode_config.plane_list, \
- base.head)
-
-#define for_each_intel_plane_mask(dev, intel_plane, plane_mask) \
- list_for_each_entry(intel_plane, \
- &(dev)->mode_config.plane_list, \
- base.head) \
- for_each_if ((plane_mask) & \
- (1 << drm_plane_index(&intel_plane->base)))
-
-#define for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) \
- list_for_each_entry(intel_plane, \
- &(dev)->mode_config.plane_list, \
- base.head) \
- for_each_if ((intel_plane)->pipe == (intel_crtc)->pipe)
-
-#define for_each_intel_crtc(dev, intel_crtc) \
- list_for_each_entry(intel_crtc, \
- &(dev)->mode_config.crtc_list, \
- base.head)
-
-#define for_each_intel_crtc_mask(dev, intel_crtc, crtc_mask) \
- list_for_each_entry(intel_crtc, \
- &(dev)->mode_config.crtc_list, \
- base.head) \
- for_each_if ((crtc_mask) & (1 << drm_crtc_index(&intel_crtc->base)))
-
-#define for_each_intel_encoder(dev, intel_encoder) \
- list_for_each_entry(intel_encoder, \
- &(dev)->mode_config.encoder_list, \
- base.head)
-
-#define for_each_intel_connector_iter(intel_connector, iter) \
- while ((intel_connector = to_intel_connector(drm_connector_list_iter_next(iter))))
-
-#define for_each_encoder_on_crtc(dev, __crtc, intel_encoder) \
- list_for_each_entry((intel_encoder), &(dev)->mode_config.encoder_list, base.head) \
- for_each_if ((intel_encoder)->base.crtc == (__crtc))
-
-#define for_each_connector_on_encoder(dev, __encoder, intel_connector) \
- list_for_each_entry((intel_connector), &(dev)->mode_config.connector_list, base.head) \
- for_each_if ((intel_connector)->base.encoder == (__encoder))
-
-#define for_each_power_domain(domain, mask) \
- for ((domain) = 0; (domain) < POWER_DOMAIN_NUM; (domain)++) \
- for_each_if (BIT_ULL(domain) & (mask))
-
-#define for_each_power_well(__dev_priv, __power_well) \
- for ((__power_well) = (__dev_priv)->power_domains.power_wells; \
- (__power_well) - (__dev_priv)->power_domains.power_wells < \
- (__dev_priv)->power_domains.power_well_count; \
- (__power_well)++)
-
-#define for_each_power_well_rev(__dev_priv, __power_well) \
- for ((__power_well) = (__dev_priv)->power_domains.power_wells + \
- (__dev_priv)->power_domains.power_well_count - 1; \
- (__power_well) - (__dev_priv)->power_domains.power_wells >= 0; \
- (__power_well)--)
-
-#define for_each_power_domain_well(__dev_priv, __power_well, __domain_mask) \
- for_each_power_well(__dev_priv, __power_well) \
- for_each_if ((__power_well)->domains & (__domain_mask))
-
-#define for_each_power_domain_well_rev(__dev_priv, __power_well, __domain_mask) \
- for_each_power_well_rev(__dev_priv, __power_well) \
- for_each_if ((__power_well)->domains & (__domain_mask))
-
-#define for_each_new_intel_plane_in_state(__state, plane, new_plane_state, __i) \
- for ((__i) = 0; \
- (__i) < (__state)->base.dev->mode_config.num_total_plane && \
- ((plane) = to_intel_plane((__state)->base.planes[__i].ptr), \
- (new_plane_state) = to_intel_plane_state((__state)->base.planes[__i].new_state), 1); \
- (__i)++) \
- for_each_if (plane)
-
-#define for_each_new_intel_crtc_in_state(__state, crtc, new_crtc_state, __i) \
- for ((__i) = 0; \
- (__i) < (__state)->base.dev->mode_config.num_crtc && \
- ((crtc) = to_intel_crtc((__state)->base.crtcs[__i].ptr), \
- (new_crtc_state) = to_intel_crtc_state((__state)->base.crtcs[__i].new_state), 1); \
- (__i)++) \
- for_each_if (crtc)
-
-#define for_each_oldnew_intel_plane_in_state(__state, plane, old_plane_state, new_plane_state, __i) \
- for ((__i) = 0; \
- (__i) < (__state)->base.dev->mode_config.num_total_plane && \
- ((plane) = to_intel_plane((__state)->base.planes[__i].ptr), \
- (old_plane_state) = to_intel_plane_state((__state)->base.planes[__i].old_state), \
- (new_plane_state) = to_intel_plane_state((__state)->base.planes[__i].new_state), 1); \
- (__i)++) \
- for_each_if (plane)
-
struct drm_i915_private;
struct i915_mm_struct;
struct i915_mmu_object;
@@ -623,20 +343,6 @@ struct drm_i915_file_private {
atomic_t context_bans;
};
-/* Used by dp and fdi links */
-struct intel_link_m_n {
- uint32_t tu;
- uint32_t gmch_m;
- uint32_t gmch_n;
- uint32_t link_m;
- uint32_t link_n;
-};
-
-void intel_link_compute_m_n(int bpp, int nlanes,
- int pixel_clock, int link_clock,
- struct intel_link_m_n *m_n,
- bool reduce_m_n);
-
/* Interface history:
*
* 1.1: Original.
@@ -651,27 +357,6 @@ void intel_link_compute_m_n(int bpp, int nlanes,
#define DRIVER_MINOR 6
#define DRIVER_PATCHLEVEL 0
-struct opregion_header;
-struct opregion_acpi;
-struct opregion_swsci;
-struct opregion_asle;
-
-struct intel_opregion {
- struct opregion_header *header;
- struct opregion_acpi *acpi;
- struct opregion_swsci *swsci;
- u32 swsci_gbda_sub_functions;
- u32 swsci_sbcb_sub_functions;
- struct opregion_asle *asle;
- void *rvda;
- void *vbt_firmware;
- const void *vbt;
- u32 vbt_size;
- u32 *lid_state;
- struct work_struct asle_work;
-};
-#define OPREGION_SIZE (8*1024)
-
struct intel_overlay;
struct intel_overlay_error_state;
@@ -764,137 +449,6 @@ struct intel_csr {
uint32_t allowed_dc_mask;
};
-#define DEV_INFO_FOR_EACH_FLAG(func) \
- func(is_mobile); \
- func(is_lp); \
- func(is_alpha_support); \
- /* Keep has_* in alphabetical order */ \
- func(has_64bit_reloc); \
- func(has_aliasing_ppgtt); \
- func(has_csr); \
- func(has_ddi); \
- func(has_dp_mst); \
- func(has_reset_engine); \
- func(has_fbc); \
- func(has_fpga_dbg); \
- func(has_full_ppgtt); \
- func(has_full_48bit_ppgtt); \
- func(has_gmch_display); \
- func(has_guc); \
- func(has_guc_ct); \
- func(has_hotplug); \
- func(has_l3_dpf); \
- func(has_llc); \
- func(has_logical_ring_contexts); \
- func(has_logical_ring_preemption); \
- func(has_overlay); \
- func(has_pooled_eu); \
- func(has_psr); \
- func(has_rc6); \
- func(has_rc6p); \
- func(has_resource_streamer); \
- func(has_runtime_pm); \
- func(has_snoop); \
- func(unfenced_needs_alignment); \
- func(cursor_needs_physical); \
- func(hws_needs_physical); \
- func(overlay_needs_physical); \
- func(supports_tv); \
- func(has_ipc);
-
-struct sseu_dev_info {
- u8 slice_mask;
- u8 subslice_mask;
- u8 eu_total;
- u8 eu_per_subslice;
- u8 min_eu_in_pool;
- /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */
- u8 subslice_7eu[3];
- u8 has_slice_pg:1;
- u8 has_subslice_pg:1;
- u8 has_eu_pg:1;
-};
-
-static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu)
-{
- return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask);
-}
-
-/* Keep in gen based order, and chronological order within a gen */
-enum intel_platform {
- INTEL_PLATFORM_UNINITIALIZED = 0,
- INTEL_I830,
- INTEL_I845G,
- INTEL_I85X,
- INTEL_I865G,
- INTEL_I915G,
- INTEL_I915GM,
- INTEL_I945G,
- INTEL_I945GM,
- INTEL_G33,
- INTEL_PINEVIEW,
- INTEL_I965G,
- INTEL_I965GM,
- INTEL_G45,
- INTEL_GM45,
- INTEL_IRONLAKE,
- INTEL_SANDYBRIDGE,
- INTEL_IVYBRIDGE,
- INTEL_VALLEYVIEW,
- INTEL_HASWELL,
- INTEL_BROADWELL,
- INTEL_CHERRYVIEW,
- INTEL_SKYLAKE,
- INTEL_BROXTON,
- INTEL_KABYLAKE,
- INTEL_GEMINILAKE,
- INTEL_COFFEELAKE,
- INTEL_CANNONLAKE,
- INTEL_MAX_PLATFORMS
-};
-
-struct intel_device_info {
- u16 device_id;
- u16 gen_mask;
-
- u8 gen;
- u8 gt; /* GT number, 0 if undefined */
- u8 num_rings;
- u8 ring_mask; /* Rings supported by the HW */
-
- enum intel_platform platform;
- u32 platform_mask;
-
- u32 display_mmio_offset;
-
- u8 num_pipes;
- u8 num_sprites[I915_MAX_PIPES];
- u8 num_scalers[I915_MAX_PIPES];
-
- unsigned int page_sizes; /* page sizes supported by the HW */
-
-#define DEFINE_FLAG(name) u8 name:1
- DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG);
-#undef DEFINE_FLAG
- u16 ddb_size; /* in blocks */
-
- /* Register offsets for the various display pipes and transcoders */
- int pipe_offsets[I915_MAX_TRANSCODERS];
- int trans_offsets[I915_MAX_TRANSCODERS];
- int palette_offsets[I915_MAX_PIPES];
- int cursor_offsets[I915_MAX_PIPES];
-
- /* Slice/subslice/EU info */
- struct sseu_dev_info sseu;
-
- u32 cs_timestamp_frequency_khz;
-
- struct color_luts {
- u16 degamma_lut_size;
- u16 gamma_lut_size;
- } color;
-};
-
struct intel_display_error_state;
struct i915_gpu_state {
@@ -948,6 +502,7 @@ struct i915_gpu_state {
struct drm_i915_error_engine {
int engine_id;
/* Software tracked state */
+ bool idle;
bool waiting;
int num_waiters;
unsigned long hangcheck_timestamp;
@@ -2405,6 +1960,9 @@ struct drm_i915_private {
*/
struct workqueue_struct *wq;
+ /* ordered wq for modesets */
+ struct workqueue_struct *modeset_wq;
+
/* Display functions */
struct drm_i915_display_funcs display;
@@ -4111,41 +3669,6 @@ bool intel_bios_is_port_hpd_inverted(struct drm_i915_private *dev_priv,
bool intel_bios_is_lspcon_present(struct drm_i915_private *dev_priv,
enum port port);
-
-/* intel_opregion.c */
-#ifdef CONFIG_ACPI
-extern int intel_opregion_setup(struct drm_i915_private *dev_priv);
-extern void intel_opregion_register(struct drm_i915_private *dev_priv);
-extern void intel_opregion_unregister(struct drm_i915_private *dev_priv);
-extern void intel_opregion_asle_intr(struct drm_i915_private *dev_priv);
-extern int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder,
- bool enable);
-extern int intel_opregion_notify_adapter(struct drm_i915_private *dev_priv,
- pci_power_t state);
-extern int intel_opregion_get_panel_type(struct drm_i915_private *dev_priv);
-#else
-static inline int intel_opregion_setup(struct drm_i915_private *dev) { return 0; }
-static inline void intel_opregion_register(struct drm_i915_private *dev_priv) { }
-static inline void intel_opregion_unregister(struct drm_i915_private *dev_priv) { }
-static inline void intel_opregion_asle_intr(struct drm_i915_private *dev_priv)
-{
-}
-static inline int
-intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, bool enable)
-{
- return 0;
-}
-static inline int
-intel_opregion_notify_adapter(struct drm_i915_private *dev, pci_power_t state)
-{
- return 0;
-}
-static inline int intel_opregion_get_panel_type(struct drm_i915_private *dev)
-{
- return -ENODEV;
-}
-#endif
-
/* intel_acpi.c */
#ifdef CONFIG_ACPI
extern void intel_register_dsm_handler(void);
@@ -4162,10 +3685,6 @@ mkwrite_device_info(struct drm_i915_private *dev_priv)
return (struct intel_device_info *)&dev_priv->info;
}
-const char *intel_platform_name(enum intel_platform platform);
-void intel_device_info_runtime_init(struct drm_i915_private *dev_priv);
-void intel_device_info_dump(struct drm_i915_private *dev_priv);
-
/* modesetting */
extern void intel_modeset_init_hw(struct drm_device *dev);
extern int intel_modeset_init(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4a7f5579a7a5..ba9f67c256f4 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2596,7 +2596,7 @@ static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
}
err = obj->ops->get_pages(obj);
- GEM_BUG_ON(!err && IS_ERR_OR_NULL(obj->mm.pages));
+ GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj));
return err;
}
@@ -3089,7 +3089,12 @@ i915_gem_reset_request(struct intel_engine_cs *engine,
void i915_gem_reset_engine(struct intel_engine_cs *engine,
struct drm_i915_gem_request *request)
{
- engine->irq_posted = 0;
+ /*
+ * Make sure this write is visible before we re-enable the interrupt
+ * handlers on another CPU, as tasklet_enable() resolves to just
+ * a compiler barrier which is insufficient for our purpose here.
+ */
+ smp_store_mb(engine->irq_posted, 0);
if (request)
request = i915_gem_reset_request(engine, request);
@@ -3119,6 +3124,25 @@ void i915_gem_reset(struct drm_i915_private *dev_priv)
ctx = fetch_and_zero(&engine->last_retired_context);
if (ctx)
engine->context_unpin(engine, ctx);
+
+ /*
+ * Ostensibily, we always want a context loaded for powersaving,
+ * so if the engine is idle after the reset, send a request
+ * to load our scratch kernel_context.
+ *
+ * More mysteriously, if we leave the engine idle after a reset,
+ * the next userspace batch may hang, with what appears to be
+ * an incoherent read by the CS (presumably stale TLB). An
+ * empty request appears sufficient to paper over the glitch.
+ */
+ if (list_empty(&engine->timeline->requests)) {
+ struct drm_i915_gem_request *rq;
+
+ rq = i915_gem_request_alloc(engine,
+ dev_priv->kernel_context);
+ if (!IS_ERR(rq))
+ __i915_add_request(rq, false);
+ }
}
i915_gem_restore_fences(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c
index ee83ec838ee7..a1d6956734f7 100644
--- a/drivers/gpu/drm/i915/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/i915_gem_internal.c
@@ -27,6 +27,7 @@
#include "i915_drv.h"
#define QUIET (__GFP_NORETRY | __GFP_NOWARN)
+#define MAYFAIL (__GFP_RETRY_MAYFAIL | __GFP_NOWARN)
/* convert swiotlb segment size into sensible units (pages)! */
#define IO_TLB_SEGPAGES (IO_TLB_SEGSIZE << IO_TLB_SHIFT >> PAGE_SHIFT)
@@ -95,7 +96,8 @@ create_st:
struct page *page;
do {
- page = alloc_pages(gfp | (order ? QUIET : 0), order);
+ page = alloc_pages(gfp | (order ? QUIET : MAYFAIL),
+ order);
if (page)
break;
if (!order--)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 59f023bb7015..d575109f7a7f 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -479,6 +479,7 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
/* Transfer from per-context onto the global per-engine timeline */
timeline = engine->timeline;
GEM_BUG_ON(timeline == request->timeline);
+ GEM_BUG_ON(request->global_seqno);
seqno = timeline_get_seqno(timeline);
GEM_BUG_ON(!seqno);
@@ -525,6 +526,7 @@ void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request)
/* Only unwind in reverse order, required so that the per-context list
* is kept in seqno/ring order.
*/
+ GEM_BUG_ON(!request->global_seqno);
GEM_BUG_ON(request->global_seqno != engine->timeline->seqno);
engine->timeline->seqno--;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index aba50aa613f1..944059322daa 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -416,6 +416,7 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
int n;
err_printf(m, "%s command stream:\n", engine_str(ee->engine_id));
+ err_printf(m, " IDLE?: %s\n", yesno(ee->idle));
err_printf(m, " START: 0x%08x\n", ee->start);
err_printf(m, " HEAD: 0x%08x [0x%08x]\n", ee->head, ee->rq_head);
err_printf(m, " TAIL: 0x%08x [0x%08x, 0x%08x]\n",
@@ -564,34 +565,17 @@ static void print_error_obj(struct drm_i915_error_state_buf *m,
static void err_print_capabilities(struct drm_i915_error_state_buf *m,
const struct intel_device_info *info)
{
-#define PRINT_FLAG(x) err_printf(m, #x ": %s\n", yesno(info->x))
- DEV_INFO_FOR_EACH_FLAG(PRINT_FLAG);
-#undef PRINT_FLAG
-}
+ struct drm_printer p = i915_error_printer(m);
-static __always_inline void err_print_param(struct drm_i915_error_state_buf *m,
- const char *name,
- const char *type,
- const void *x)
-{
- if (!__builtin_strcmp(type, "bool"))
- err_printf(m, "i915.%s=%s\n", name, yesno(*(const bool *)x));
- else if (!__builtin_strcmp(type, "int"))
- err_printf(m, "i915.%s=%d\n", name, *(const int *)x);
- else if (!__builtin_strcmp(type, "unsigned int"))
- err_printf(m, "i915.%s=%u\n", name, *(const unsigned int *)x);
- else if (!__builtin_strcmp(type, "char *"))
- err_printf(m, "i915.%s=%s\n", name, *(const char **)x);
- else
- BUILD_BUG();
+ intel_device_info_dump_flags(info, &p);
}
static void err_print_params(struct drm_i915_error_state_buf *m,
- const struct i915_params *p)
+ const struct i915_params *params)
{
-#define PRINT(T, x, ...) err_print_param(m, #x, #T, &p->x);
- I915_PARAMS_FOR_EACH(PRINT);
-#undef PRINT
+ struct drm_printer p = i915_error_printer(m);
+
+ i915_params_dump(params, &p);
}
static void err_print_pciid(struct drm_i915_error_state_buf *m,
@@ -1256,6 +1240,7 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
ee->hws = I915_READ(mmio);
}
+ ee->idle = intel_engine_is_idle(engine);
ee->hangcheck_timestamp = engine->hangcheck.action_timestamp;
ee->hangcheck_action = engine->hangcheck.action;
ee->hangcheck_stalled = engine->hangcheck.stalled;
diff --git a/drivers/gpu/drm/i915/i915_memcpy.c b/drivers/gpu/drm/i915/i915_memcpy.c
index 49a079494b68..79f8ec756362 100644
--- a/drivers/gpu/drm/i915/i915_memcpy.c
+++ b/drivers/gpu/drm/i915/i915_memcpy.c
@@ -96,6 +96,11 @@ bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len)
void i915_memcpy_init_early(struct drm_i915_private *dev_priv)
{
- if (static_cpu_has(X86_FEATURE_XMM4_1))
+ /*
+ * Some hypervisors (e.g. KVM) don't support VEX-prefix instructions
+ * emulation. So don't enable movntdqa in hypervisor guest.
+ */
+ if (static_cpu_has(X86_FEATURE_XMM4_1) &&
+ !boot_cpu_has(X86_FEATURE_HYPERVISOR))
static_branch_enable(&has_movntdqa);
}
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 8dfea0320c2f..b5f3eb4fa8a3 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -22,6 +22,8 @@
* IN THE SOFTWARE.
*/
+#include <drm/drm_print.h>
+
#include "i915_params.h"
#include "i915_drv.h"
@@ -172,3 +174,34 @@ i915_param_named(enable_dpcd_backlight, bool, 0600,
i915_param_named(enable_gvt, bool, 0400,
"Enable support for Intel GVT-g graphics virtualization host support(default:false)");
+
+static __always_inline void _print_param(struct drm_printer *p,
+ const char *name,
+ const char *type,
+ const void *x)
+{
+ if (!__builtin_strcmp(type, "bool"))
+ drm_printf(p, "i915.%s=%s\n", name, yesno(*(const bool *)x));
+ else if (!__builtin_strcmp(type, "int"))
+ drm_printf(p, "i915.%s=%d\n", name, *(const int *)x);
+ else if (!__builtin_strcmp(type, "unsigned int"))
+ drm_printf(p, "i915.%s=%u\n", name, *(const unsigned int *)x);
+ else if (!__builtin_strcmp(type, "char *"))
+ drm_printf(p, "i915.%s=%s\n", name, *(const char **)x);
+ else
+ BUILD_BUG();
+}
+
+/**
+ * i915_params_dump - dump i915 modparams
+ * @params: i915 modparams
+ * @p: the &drm_printer
+ *
+ * Pretty printer for i915 modparams.
+ */
+void i915_params_dump(const struct i915_params *params, struct drm_printer *p)
+{
+#define PRINT(T, x, ...) _print_param(p, #x, #T, &params->x);
+ I915_PARAMS_FOR_EACH(PRINT);
+#undef PRINT
+}
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index 792ce26d7449..c96360398072 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -28,6 +28,8 @@
#include <linux/bitops.h>
#include <linux/cache.h> /* for __read_mostly */
+struct drm_printer;
+
#define ENABLE_GUC_SUBMISSION BIT(0)
#define ENABLE_GUC_LOAD_HUC BIT(1)
@@ -77,5 +79,7 @@ struct i915_params {
extern struct i915_params i915_modparams __read_mostly;
+void i915_params_dump(const struct i915_params *params, struct drm_printer *p);
+
#endif
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index fa67d3dde20e..36d48422b475 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -633,6 +633,8 @@ static const struct pci_device_id pciidlist[] = {
INTEL_CFL_S_GT1_IDS(&intel_coffeelake_gt1_info),
INTEL_CFL_S_GT2_IDS(&intel_coffeelake_gt2_info),
INTEL_CFL_H_GT2_IDS(&intel_coffeelake_gt2_info),
+ INTEL_CFL_U_GT1_IDS(&intel_coffeelake_gt1_info),
+ INTEL_CFL_U_GT2_IDS(&intel_coffeelake_gt2_info),
INTEL_CFL_U_GT3_IDS(&intel_coffeelake_gt3_info),
INTEL_CNL_U_GT2_IDS(&intel_cannonlake_gt2_info),
INTEL_CNL_Y_GT2_IDS(&intel_cannonlake_gt2_info),
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 09bf043c1c2e..41285bec8fc0 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3278,6 +3278,7 @@ enum i915_power_well_id {
# define AUDUNIT_CLOCK_GATE_DISABLE (1 << 26) /* 965 */
# define DPUNIT_A_CLOCK_GATE_DISABLE (1 << 25) /* 965 */
# define DPCUNIT_CLOCK_GATE_DISABLE (1 << 24) /* 965 */
+# define PNV_GMBUSUNIT_CLOCK_GATE_DISABLE (1 << 24) /* pnv */
# define TVRUNIT_CLOCK_GATE_DISABLE (1 << 23) /* 915-945 */
# define TVCUNIT_CLOCK_GATE_DISABLE (1 << 22) /* 915-945 */
# define TVFUNIT_CLOCK_GATE_DISABLE (1 << 21) /* 915-945 */
@@ -3858,6 +3859,9 @@ enum {
#define PWM2_GATING_DIS (1 << 14)
#define PWM1_GATING_DIS (1 << 13)
+#define GEN9_CLKGATE_DIS_4 _MMIO(0x4653C)
+#define BXT_GMBUS_GATING_DIS (1 << 14)
+
#define _CLKGATE_DIS_PSL_A 0x46520
#define _CLKGATE_DIS_PSL_B 0x46524
#define _CLKGATE_DIS_PSL_C 0x46528
@@ -3875,6 +3879,9 @@ enum {
#define SARBUNIT_CLKGATE_DIS (1 << 5)
#define RCCUNIT_CLKGATE_DIS (1 << 7)
+#define UNSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9434)
+#define VFUNIT_CLKGATE_DIS (1 << 20)
+
/*
* Display engine regs
*/
@@ -6329,6 +6336,7 @@ enum {
#define PLANE_CTL_TILED_X ( 1 << 10)
#define PLANE_CTL_TILED_Y ( 4 << 10)
#define PLANE_CTL_TILED_YF ( 5 << 10)
+#define PLANE_CTL_FLIP_HORIZONTAL ( 1 << 8)
#define PLANE_CTL_ALPHA_MASK (0x3 << 4) /* Pre-GLK */
#define PLANE_CTL_ALPHA_DISABLE ( 0 << 4)
#define PLANE_CTL_ALPHA_SW_PREMULTIPLY ( 2 << 4)
@@ -7552,6 +7560,7 @@ enum {
#define FDI_RX_CHICKEN(pipe) _MMIO_PIPE(pipe, _FDI_RXA_CHICKEN, _FDI_RXB_CHICKEN)
#define SOUTH_DSPCLK_GATE_D _MMIO(0xc2020)
+#define PCH_GMBUSUNIT_CLOCK_GATE_DISABLE (1<<31)
#define PCH_DPLUNIT_CLOCK_GATE_DISABLE (1<<30)
#define PCH_DPLSUNIT_CLOCK_GATE_DISABLE (1<<29)
#define PCH_CPUNIT_CLOCK_GATE_DISABLE (1<<14)
@@ -8142,6 +8151,7 @@ enum {
#define PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE (1<<8)
#define STALL_DOP_GATING_DISABLE (1<<5)
#define THROTTLE_12_5 (7<<2)
+#define DISABLE_EARLY_EOT (1<<1)
#define GEN7_ROW_CHICKEN2 _MMIO(0xe4f4)
#define GEN7_ROW_CHICKEN2_GT2 _MMIO(0xf4f4)
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 4e76768ffa95..e1169c02eb2b 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -616,6 +616,7 @@ TRACE_EVENT(i915_gem_request_queue,
TP_STRUCT__entry(
__field(u32, dev)
+ __field(u32, hw_id)
__field(u32, ring)
__field(u32, ctx)
__field(u32, seqno)
@@ -624,15 +625,16 @@ TRACE_EVENT(i915_gem_request_queue,
TP_fast_assign(
__entry->dev = req->i915->drm.primary->index;
+ __entry->hw_id = req->ctx->hw_id;
__entry->ring = req->engine->id;
__entry->ctx = req->fence.context;
__entry->seqno = req->fence.seqno;
__entry->flags = flags;
),
- TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, flags=0x%x",
- __entry->dev, __entry->ring, __entry->ctx, __entry->seqno,
- __entry->flags)
+ TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, flags=0x%x",
+ __entry->dev, __entry->hw_id, __entry->ring, __entry->ctx,
+ __entry->seqno, __entry->flags)
);
DECLARE_EVENT_CLASS(i915_gem_request,
@@ -641,23 +643,25 @@ DECLARE_EVENT_CLASS(i915_gem_request,
TP_STRUCT__entry(
__field(u32, dev)
- __field(u32, ctx)
+ __field(u32, hw_id)
__field(u32, ring)
+ __field(u32, ctx)
__field(u32, seqno)
__field(u32, global)
),
TP_fast_assign(
__entry->dev = req->i915->drm.primary->index;
+ __entry->hw_id = req->ctx->hw_id;
__entry->ring = req->engine->id;
__entry->ctx = req->fence.context;
__entry->seqno = req->fence.seqno;
__entry->global = req->global_seqno;
),
- TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, global=%u",
- __entry->dev, __entry->ring, __entry->ctx, __entry->seqno,
- __entry->global)
+ TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, global=%u",
+ __entry->dev, __entry->hw_id, __entry->ring, __entry->ctx,
+ __entry->seqno, __entry->global)
);
DEFINE_EVENT(i915_gem_request, i915_gem_request_add,
@@ -683,15 +687,17 @@ DECLARE_EVENT_CLASS(i915_gem_request_hw,
TP_STRUCT__entry(
__field(u32, dev)
+ __field(u32, hw_id)
__field(u32, ring)
+ __field(u32, ctx)
__field(u32, seqno)
__field(u32, global_seqno)
- __field(u32, ctx)
__field(u32, port)
),
TP_fast_assign(
__entry->dev = req->i915->drm.primary->index;
+ __entry->hw_id = req->ctx->hw_id;
__entry->ring = req->engine->id;
__entry->ctx = req->fence.context;
__entry->seqno = req->fence.seqno;
@@ -699,10 +705,10 @@ DECLARE_EVENT_CLASS(i915_gem_request_hw,
__entry->port = port;
),
- TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, global=%u, port=%u",
- __entry->dev, __entry->ring, __entry->ctx,
- __entry->seqno, __entry->global_seqno,
- __entry->port)
+ TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, global=%u, port=%u",
+ __entry->dev, __entry->hw_id, __entry->ring,
+ __entry->ctx, __entry->seqno,
+ __entry->global_seqno, __entry->port)
);
DEFINE_EVENT(i915_gem_request_hw, i915_gem_request_in,
@@ -772,6 +778,7 @@ TRACE_EVENT(i915_gem_request_wait_begin,
TP_STRUCT__entry(
__field(u32, dev)
+ __field(u32, hw_id)
__field(u32, ring)
__field(u32, ctx)
__field(u32, seqno)
@@ -787,6 +794,7 @@ TRACE_EVENT(i915_gem_request_wait_begin,
*/
TP_fast_assign(
__entry->dev = req->i915->drm.primary->index;
+ __entry->hw_id = req->ctx->hw_id;
__entry->ring = req->engine->id;
__entry->ctx = req->fence.context;
__entry->seqno = req->fence.seqno;
@@ -794,10 +802,10 @@ TRACE_EVENT(i915_gem_request_wait_begin,
__entry->flags = flags;
),
- TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, global=%u, blocking=%u, flags=0x%x",
- __entry->dev, __entry->ring, __entry->ctx, __entry->seqno,
- __entry->global, !!(__entry->flags & I915_WAIT_LOCKED),
- __entry->flags)
+ TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, global=%u, blocking=%u, flags=0x%x",
+ __entry->dev, __entry->hw_id, __entry->ring, __entry->ctx,
+ __entry->seqno, __entry->global,
+ !!(__entry->flags & I915_WAIT_LOCKED), __entry->flags)
);
DEFINE_EVENT(i915_gem_request, i915_gem_request_wait_end,
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 8d07764887ec..51dbfe5bb418 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -140,4 +140,19 @@ static inline void drain_delayed_work(struct delayed_work *dw)
} while (delayed_work_pending(dw));
}
+static inline const char *yesno(bool v)
+{
+ return v ? "yes" : "no";
+}
+
+static inline const char *onoff(bool v)
+{
+ return v ? "on" : "off";
+}
+
+static inline const char *enableddisabled(bool v)
+{
+ return v ? "enabled" : "disabled";
+}
+
#endif /* !__I915_UTILS_H */
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index 369f780588fb..f51645a08dca 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -2095,6 +2095,8 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder,
if (WARN_ON(!pll))
return;
+ mutex_lock(&dev_priv->dpll_lock);
+
if (IS_CANNONLAKE(dev_priv)) {
/* Configure DPCLKA_CFGCR0 to map the DPLL to the DDI. */
val = I915_READ(DPCLKA_CFGCR0);
@@ -2115,7 +2117,7 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder,
val = I915_READ(DPLL_CTRL2);
val &= ~(DPLL_CTRL2_DDI_CLK_OFF(port) |
- DPLL_CTRL2_DDI_CLK_SEL_MASK(port));
+ DPLL_CTRL2_DDI_CLK_SEL_MASK(port));
val |= (DPLL_CTRL2_DDI_CLK_SEL(pll->id, port) |
DPLL_CTRL2_DDI_SEL_OVERRIDE(port));
@@ -2124,6 +2126,8 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder,
} else if (INTEL_INFO(dev_priv)->gen < 9) {
I915_WRITE(PORT_CLK_SEL(port), hsw_pll_to_ddi_pll_sel(pll));
}
+
+ mutex_unlock(&dev_priv->dpll_lock);
}
static void intel_ddi_clk_disable(struct intel_encoder *encoder)
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index f478be3ae0ba..d28592e43512 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -22,6 +22,9 @@
*
*/
+#include <drm/drm_print.h>
+
+#include "intel_device_info.h"
#include "i915_drv.h"
#define PLATFORM_NAME(x) [INTEL_##x] = #x
@@ -67,21 +70,55 @@ const char *intel_platform_name(enum intel_platform platform)
return platform_names[platform];
}
-void intel_device_info_dump(struct drm_i915_private *dev_priv)
+void intel_device_info_dump_flags(const struct intel_device_info *info,
+ struct drm_printer *p)
{
- const struct intel_device_info *info = &dev_priv->info;
-
- DRM_DEBUG_DRIVER("i915 device info: platform=%s gen=%i pciid=0x%04x rev=0x%02x",
- intel_platform_name(info->platform),
- info->gen,
- dev_priv->drm.pdev->device,
- dev_priv->drm.pdev->revision);
-#define PRINT_FLAG(name) \
- DRM_DEBUG_DRIVER("i915 device info: " #name ": %s", yesno(info->name))
+#define PRINT_FLAG(name) drm_printf(p, "%s: %s\n", #name, yesno(info->name));
DEV_INFO_FOR_EACH_FLAG(PRINT_FLAG);
#undef PRINT_FLAG
}
+static void sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p)
+{
+ drm_printf(p, "slice mask: %04x\n", sseu->slice_mask);
+ drm_printf(p, "slice total: %u\n", hweight8(sseu->slice_mask));
+ drm_printf(p, "subslice total: %u\n", sseu_subslice_total(sseu));
+ drm_printf(p, "subslice mask %04x\n", sseu->subslice_mask);
+ drm_printf(p, "subslice per slice: %u\n",
+ hweight8(sseu->subslice_mask));
+ drm_printf(p, "EU total: %u\n", sseu->eu_total);
+ drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice);
+ drm_printf(p, "has slice power gating: %s\n",
+ yesno(sseu->has_slice_pg));
+ drm_printf(p, "has subslice power gating: %s\n",
+ yesno(sseu->has_subslice_pg));
+ drm_printf(p, "has EU power gating: %s\n", yesno(sseu->has_eu_pg));
+}
+
+void intel_device_info_dump_runtime(const struct intel_device_info *info,
+ struct drm_printer *p)
+{
+ sseu_dump(&info->sseu, p);
+
+ drm_printf(p, "CS timestamp frequency: %u kHz\n",
+ info->cs_timestamp_frequency_khz);
+}
+
+void intel_device_info_dump(const struct intel_device_info *info,
+ struct drm_printer *p)
+{
+ struct drm_i915_private *dev_priv =
+ container_of(info, struct drm_i915_private, info);
+
+ drm_printf(p, "pciid=0x%04x rev=0x%02x platform=%s gen=%i\n",
+ INTEL_DEVID(dev_priv),
+ INTEL_REVID(dev_priv),
+ intel_platform_name(info->platform),
+ info->gen);
+
+ intel_device_info_dump_flags(info, p);
+}
+
static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
{
struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
@@ -420,7 +457,10 @@ static u32 read_timestamp_frequency(struct drm_i915_private *dev_priv)
return 0;
}
-/*
+/**
+ * intel_device_info_runtime_init - initialize runtime info
+ * @info: intel device info struct
+ *
* Determine various intel_device_info fields at runtime.
*
* Use it when either:
@@ -433,9 +473,10 @@ static u32 read_timestamp_frequency(struct drm_i915_private *dev_priv)
* - after the PCH has been detected,
* - before the first usage of the fields it can tweak.
*/
-void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
+void intel_device_info_runtime_init(struct intel_device_info *info)
{
- struct intel_device_info *info = mkwrite_device_info(dev_priv);
+ struct drm_i915_private *dev_priv =
+ container_of(info, struct drm_i915_private, info);
enum pipe pipe;
if (INTEL_GEN(dev_priv) >= 10) {
@@ -543,22 +584,4 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
/* Initialize command stream timestamp frequency */
info->cs_timestamp_frequency_khz = read_timestamp_frequency(dev_priv);
-
- DRM_DEBUG_DRIVER("slice mask: %04x\n", info->sseu.slice_mask);
- DRM_DEBUG_DRIVER("slice total: %u\n", hweight8(info->sseu.slice_mask));
- DRM_DEBUG_DRIVER("subslice total: %u\n",
- sseu_subslice_total(&info->sseu));
- DRM_DEBUG_DRIVER("subslice mask %04x\n", info->sseu.subslice_mask);
- DRM_DEBUG_DRIVER("subslice per slice: %u\n",
- hweight8(info->sseu.subslice_mask));
- DRM_DEBUG_DRIVER("EU total: %u\n", info->sseu.eu_total);
- DRM_DEBUG_DRIVER("EU per subslice: %u\n", info->sseu.eu_per_subslice);
- DRM_DEBUG_DRIVER("has slice power gating: %s\n",
- info->sseu.has_slice_pg ? "y" : "n");
- DRM_DEBUG_DRIVER("has subslice power gating: %s\n",
- info->sseu.has_subslice_pg ? "y" : "n");
- DRM_DEBUG_DRIVER("has EU power gating: %s\n",
- info->sseu.has_eu_pg ? "y" : "n");
- DRM_DEBUG_DRIVER("CS timestamp frequency: %u kHz\n",
- info->cs_timestamp_frequency_khz);
}
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
new file mode 100644
index 000000000000..49cb27bd04c1
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright © 2014-2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _INTEL_DEVICE_INFO_H_
+#define _INTEL_DEVICE_INFO_H_
+
+#include "intel_display.h"
+
+struct drm_printer;
+struct drm_i915_private;
+
+/* Keep in gen based order, and chronological order within a gen */
+enum intel_platform {
+ INTEL_PLATFORM_UNINITIALIZED = 0,
+ /* gen2 */
+ INTEL_I830,
+ INTEL_I845G,
+ INTEL_I85X,
+ INTEL_I865G,
+ /* gen3 */
+ INTEL_I915G,
+ INTEL_I915GM,
+ INTEL_I945G,
+ INTEL_I945GM,
+ INTEL_G33,
+ INTEL_PINEVIEW,
+ /* gen4 */
+ INTEL_I965G,
+ INTEL_I965GM,
+ INTEL_G45,
+ INTEL_GM45,
+ /* gen5 */
+ INTEL_IRONLAKE,
+ /* gen6 */
+ INTEL_SANDYBRIDGE,
+ /* gen7 */
+ INTEL_IVYBRIDGE,
+ INTEL_VALLEYVIEW,
+ INTEL_HASWELL,
+ /* gen8 */
+ INTEL_BROADWELL,
+ INTEL_CHERRYVIEW,
+ /* gen9 */
+ INTEL_SKYLAKE,
+ INTEL_BROXTON,
+ INTEL_KABYLAKE,
+ INTEL_GEMINILAKE,
+ INTEL_COFFEELAKE,
+ /* gen10 */
+ INTEL_CANNONLAKE,
+ INTEL_MAX_PLATFORMS
+};
+
+#define DEV_INFO_FOR_EACH_FLAG(func) \
+ func(is_mobile); \
+ func(is_lp); \
+ func(is_alpha_support); \
+ /* Keep has_* in alphabetical order */ \
+ func(has_64bit_reloc); \
+ func(has_aliasing_ppgtt); \
+ func(has_csr); \
+ func(has_ddi); \
+ func(has_dp_mst); \
+ func(has_reset_engine); \
+ func(has_fbc); \
+ func(has_fpga_dbg); \
+ func(has_full_ppgtt); \
+ func(has_full_48bit_ppgtt); \
+ func(has_gmch_display); \
+ func(has_guc); \
+ func(has_guc_ct); \
+ func(has_hotplug); \
+ func(has_l3_dpf); \
+ func(has_llc); \
+ func(has_logical_ring_contexts); \
+ func(has_logical_ring_preemption); \
+ func(has_overlay); \
+ func(has_pooled_eu); \
+ func(has_psr); \
+ func(has_rc6); \
+ func(has_rc6p); \
+ func(has_resource_streamer); \
+ func(has_runtime_pm); \
+ func(has_snoop); \
+ func(unfenced_needs_alignment); \
+ func(cursor_needs_physical); \
+ func(hws_needs_physical); \
+ func(overlay_needs_physical); \
+ func(supports_tv); \
+ func(has_ipc);
+
+struct sseu_dev_info {
+ u8 slice_mask;
+ u8 subslice_mask;
+ u8 eu_total;
+ u8 eu_per_subslice;
+ u8 min_eu_in_pool;
+ /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */
+ u8 subslice_7eu[3];
+ u8 has_slice_pg:1;
+ u8 has_subslice_pg:1;
+ u8 has_eu_pg:1;
+};
+
+struct intel_device_info {
+ u16 device_id;
+ u16 gen_mask;
+
+ u8 gen;
+ u8 gt; /* GT number, 0 if undefined */
+ u8 num_rings;
+ u8 ring_mask; /* Rings supported by the HW */
+
+ enum intel_platform platform;
+ u32 platform_mask;
+
+ u32 display_mmio_offset;
+
+ u8 num_pipes;
+ u8 num_sprites[I915_MAX_PIPES];
+ u8 num_scalers[I915_MAX_PIPES];
+
+ unsigned int page_sizes; /* page sizes supported by the HW */
+
+#define DEFINE_FLAG(name) u8 name:1
+ DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG);
+#undef DEFINE_FLAG
+ u16 ddb_size; /* in blocks */
+
+ /* Register offsets for the various display pipes and transcoders */
+ int pipe_offsets[I915_MAX_TRANSCODERS];
+ int trans_offsets[I915_MAX_TRANSCODERS];
+ int palette_offsets[I915_MAX_PIPES];
+ int cursor_offsets[I915_MAX_PIPES];
+
+ /* Slice/subslice/EU info */
+ struct sseu_dev_info sseu;
+
+ u32 cs_timestamp_frequency_khz;
+
+ struct color_luts {
+ u16 degamma_lut_size;
+ u16 gamma_lut_size;
+ } color;
+};
+
+static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu)
+{
+ return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask);
+}
+
+const char *intel_platform_name(enum intel_platform platform);
+
+void intel_device_info_runtime_init(struct intel_device_info *info);
+void intel_device_info_dump(const struct intel_device_info *info,
+ struct drm_printer *p);
+void intel_device_info_dump_flags(const struct intel_device_info *info,
+ struct drm_printer *p);
+void intel_device_info_dump_runtime(const struct intel_device_info *info,
+ struct drm_printer *p);
+
+#endif
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index efa6c6d19664..0cd355978ab4 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -3073,6 +3073,12 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state)
unsigned int rotation = plane_state->base.rotation;
int ret;
+ if (rotation & DRM_MODE_REFLECT_X &&
+ fb->modifier == DRM_FORMAT_MOD_LINEAR) {
+ DRM_DEBUG_KMS("horizontal flip is not supported with linear surface formats\n");
+ return -EINVAL;
+ }
+
if (!plane_state->base.visible)
return 0;
@@ -3453,9 +3459,9 @@ static u32 skl_plane_ctl_tiling(uint64_t fb_modifier)
return 0;
}
-static u32 skl_plane_ctl_rotation(unsigned int rotation)
+static u32 skl_plane_ctl_rotate(unsigned int rotate)
{
- switch (rotation) {
+ switch (rotate) {
case DRM_MODE_ROTATE_0:
break;
/*
@@ -3469,7 +3475,22 @@ static u32 skl_plane_ctl_rotation(unsigned int rotation)
case DRM_MODE_ROTATE_270:
return PLANE_CTL_ROTATE_90;
default:
- MISSING_CASE(rotation);
+ MISSING_CASE(rotate);
+ }
+
+ return 0;
+}
+
+static u32 cnl_plane_ctl_flip(unsigned int reflect)
+{
+ switch (reflect) {
+ case 0:
+ break;
+ case DRM_MODE_REFLECT_X:
+ return PLANE_CTL_FLIP_HORIZONTAL;
+ case DRM_MODE_REFLECT_Y:
+ default:
+ MISSING_CASE(reflect);
}
return 0;
@@ -3497,7 +3518,11 @@ u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state,
plane_ctl |= skl_plane_ctl_format(fb->format->format);
plane_ctl |= skl_plane_ctl_tiling(fb->modifier);
- plane_ctl |= skl_plane_ctl_rotation(rotation);
+ plane_ctl |= skl_plane_ctl_rotate(rotation & DRM_MODE_ROTATE_MASK);
+
+ if (INTEL_GEN(dev_priv) >= 10)
+ plane_ctl |= cnl_plane_ctl_flip(rotation &
+ DRM_MODE_REFLECT_MASK);
if (key->flags & I915_SET_COLORKEY_DESTINATION)
plane_ctl |= PLANE_CTL_KEY_ENABLE_DESTINATION;
@@ -9694,111 +9719,27 @@ err:
return ERR_PTR(ret);
}
-static u32
-intel_framebuffer_pitch_for_width(int width, int bpp)
-{
- u32 pitch = DIV_ROUND_UP(width * bpp, 8);
- return ALIGN(pitch, 64);
-}
-
-static u32
-intel_framebuffer_size_for_mode(const struct drm_display_mode *mode, int bpp)
-{
- u32 pitch = intel_framebuffer_pitch_for_width(mode->hdisplay, bpp);
- return PAGE_ALIGN(pitch * mode->vdisplay);
-}
-
-static struct drm_framebuffer *
-intel_framebuffer_create_for_mode(struct drm_device *dev,
- const struct drm_display_mode *mode,
- int depth, int bpp)
-{
- struct drm_framebuffer *fb;
- struct drm_i915_gem_object *obj;
- struct drm_mode_fb_cmd2 mode_cmd = { 0 };
-
- obj = i915_gem_object_create(to_i915(dev),
- intel_framebuffer_size_for_mode(mode, bpp));
- if (IS_ERR(obj))
- return ERR_CAST(obj);
-
- mode_cmd.width = mode->hdisplay;
- mode_cmd.height = mode->vdisplay;
- mode_cmd.pitches[0] = intel_framebuffer_pitch_for_width(mode_cmd.width,
- bpp);
- mode_cmd.pixel_format = drm_mode_legacy_fb_format(bpp, depth);
-
- fb = intel_framebuffer_create(obj, &mode_cmd);
- if (IS_ERR(fb))
- i915_gem_object_put(obj);
-
- return fb;
-}
-
-static struct drm_framebuffer *
-mode_fits_in_fbdev(struct drm_device *dev,
- const struct drm_display_mode *mode)
-{
-#ifdef CONFIG_DRM_FBDEV_EMULATION
- struct drm_i915_private *dev_priv = to_i915(dev);
- struct drm_i915_gem_object *obj;
- struct drm_framebuffer *fb;
-
- if (!dev_priv->fbdev)
- return NULL;
-
- if (!dev_priv->fbdev->fb)
- return NULL;
-
- obj = dev_priv->fbdev->fb->obj;
- BUG_ON(!obj);
-
- fb = &dev_priv->fbdev->fb->base;
- if (fb->pitches[0] < intel_framebuffer_pitch_for_width(mode->hdisplay,
- fb->format->cpp[0] * 8))
- return NULL;
-
- if (obj->base.size < mode->vdisplay * fb->pitches[0])
- return NULL;
-
- drm_framebuffer_get(fb);
- return fb;
-#else
- return NULL;
-#endif
-}
-
-static int intel_modeset_setup_plane_state(struct drm_atomic_state *state,
- struct drm_crtc *crtc,
- const struct drm_display_mode *mode,
- struct drm_framebuffer *fb,
- int x, int y)
+static int intel_modeset_disable_planes(struct drm_atomic_state *state,
+ struct drm_crtc *crtc)
{
+ struct drm_plane *plane;
struct drm_plane_state *plane_state;
- int hdisplay, vdisplay;
- int ret;
-
- plane_state = drm_atomic_get_plane_state(state, crtc->primary);
- if (IS_ERR(plane_state))
- return PTR_ERR(plane_state);
-
- if (mode)
- drm_mode_get_hv_timing(mode, &hdisplay, &vdisplay);
- else
- hdisplay = vdisplay = 0;
+ int ret, i;
- ret = drm_atomic_set_crtc_for_plane(plane_state, fb ? crtc : NULL);
+ ret = drm_atomic_add_affected_planes(state, crtc);
if (ret)
return ret;
- drm_atomic_set_fb_for_plane(plane_state, fb);
- plane_state->crtc_x = 0;
- plane_state->crtc_y = 0;
- plane_state->crtc_w = hdisplay;
- plane_state->crtc_h = vdisplay;
- plane_state->src_x = x << 16;
- plane_state->src_y = y << 16;
- plane_state->src_w = hdisplay << 16;
- plane_state->src_h = vdisplay << 16;
+
+ for_each_new_plane_in_state(state, plane, plane_state, i) {
+ if (plane_state->crtc != crtc)
+ continue;
+
+ ret = drm_atomic_set_crtc_for_plane(plane_state, NULL);
+ if (ret)
+ return ret;
+
+ drm_atomic_set_fb_for_plane(plane_state, NULL);
+ }
return 0;
}
@@ -9816,7 +9757,6 @@ int intel_get_load_detect_pipe(struct drm_connector *connector,
struct drm_crtc *crtc = NULL;
struct drm_device *dev = encoder->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
- struct drm_framebuffer *fb;
struct drm_mode_config *config = &dev->mode_config;
struct drm_atomic_state *state = NULL, *restore_state = NULL;
struct drm_connector_state *connector_state;
@@ -9884,10 +9824,6 @@ int intel_get_load_detect_pipe(struct drm_connector *connector,
found:
intel_crtc = to_intel_crtc(crtc);
- ret = drm_modeset_lock(&crtc->primary->mutex, ctx);
- if (ret)
- goto fail;
-
state = drm_atomic_state_alloc(dev);
restore_state = drm_atomic_state_alloc(dev);
if (!state || !restore_state) {
@@ -9919,39 +9855,17 @@ found:
if (!mode)
mode = &load_detect_mode;
- /* We need a framebuffer large enough to accommodate all accesses
- * that the plane may generate whilst we perform load detection.
- * We can not rely on the fbcon either being present (we get called
- * during its initialisation to detect all boot displays, or it may
- * not even exist) or that it is large enough to satisfy the
- * requested mode.
- */
- fb = mode_fits_in_fbdev(dev, mode);
- if (fb == NULL) {
- DRM_DEBUG_KMS("creating tmp fb for load-detection\n");
- fb = intel_framebuffer_create_for_mode(dev, mode, 24, 32);
- } else
- DRM_DEBUG_KMS("reusing fbdev for load-detection framebuffer\n");
- if (IS_ERR(fb)) {
- DRM_DEBUG_KMS("failed to allocate framebuffer for load-detection\n");
- ret = PTR_ERR(fb);
- goto fail;
- }
-
- ret = intel_modeset_setup_plane_state(state, crtc, mode, fb, 0, 0);
- drm_framebuffer_put(fb);
+ ret = drm_atomic_set_mode_for_crtc(&crtc_state->base, mode);
if (ret)
goto fail;
- ret = drm_atomic_set_mode_for_crtc(&crtc_state->base, mode);
+ ret = intel_modeset_disable_planes(state, crtc);
if (ret)
goto fail;
ret = PTR_ERR_OR_ZERO(drm_atomic_get_connector_state(restore_state, connector));
if (!ret)
ret = PTR_ERR_OR_ZERO(drm_atomic_get_crtc_state(restore_state, crtc));
- if (!ret)
- ret = PTR_ERR_OR_ZERO(drm_atomic_get_plane_state(restore_state, crtc->primary));
if (ret) {
DRM_DEBUG_KMS("Failed to create a copy of old state to restore: %i\n", ret);
goto fail;
@@ -12569,11 +12483,15 @@ static int intel_atomic_commit(struct drm_device *dev,
INIT_WORK(&state->commit_work, intel_atomic_commit_work);
i915_sw_fence_commit(&intel_state->commit_ready);
- if (nonblock)
+ if (nonblock && intel_state->modeset) {
+ queue_work(dev_priv->modeset_wq, &state->commit_work);
+ } else if (nonblock) {
queue_work(system_unbound_wq, &state->commit_work);
- else
+ } else {
+ if (intel_state->modeset)
+ flush_workqueue(dev_priv->modeset_wq);
intel_atomic_commit_tail(state);
-
+ }
return 0;
}
@@ -13238,7 +13156,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
primary->frontbuffer_bit = INTEL_FRONTBUFFER_PRIMARY(pipe);
primary->check_plane = intel_check_primary_plane;
- if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) {
+ if (INTEL_GEN(dev_priv) >= 10) {
intel_primary_formats = skl_primary_formats;
num_formats = ARRAY_SIZE(skl_primary_formats);
modifiers = skl_format_modifiers_ccs;
@@ -13300,7 +13218,12 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
if (ret)
goto fail;
- if (INTEL_GEN(dev_priv) >= 9) {
+ if (INTEL_GEN(dev_priv) >= 10) {
+ supported_rotations =
+ DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_90 |
+ DRM_MODE_ROTATE_180 | DRM_MODE_ROTATE_270 |
+ DRM_MODE_REFLECT_X;
+ } else if (INTEL_GEN(dev_priv) >= 9) {
supported_rotations =
DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_90 |
DRM_MODE_ROTATE_180 | DRM_MODE_ROTATE_270;
@@ -14531,6 +14454,8 @@ int intel_modeset_init(struct drm_device *dev)
enum pipe pipe;
struct intel_crtc *crtc;
+ dev_priv->modeset_wq = alloc_ordered_workqueue("i915_modeset", 0);
+
drm_mode_config_init(dev);
dev->mode_config.min_width = 0;
@@ -15335,6 +15260,8 @@ void intel_modeset_cleanup(struct drm_device *dev)
intel_cleanup_gt_powersave(dev_priv);
intel_teardown_gmbus(dev_priv);
+
+ destroy_workqueue(dev_priv->modeset_wq);
}
void intel_connector_attach_encoder(struct intel_connector *connector,
diff --git a/drivers/gpu/drm/i915/intel_display.h b/drivers/gpu/drm/i915/intel_display.h
new file mode 100644
index 000000000000..a0d2b6169361
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_display.h
@@ -0,0 +1,321 @@
+/*
+ * Copyright © 2006-2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _INTEL_DISPLAY_H_
+#define _INTEL_DISPLAY_H_
+
+enum pipe {
+ INVALID_PIPE = -1,
+
+ PIPE_A = 0,
+ PIPE_B,
+ PIPE_C,
+ _PIPE_EDP,
+
+ I915_MAX_PIPES = _PIPE_EDP
+};
+
+#define pipe_name(p) ((p) + 'A')
+
+enum transcoder {
+ TRANSCODER_A = 0,
+ TRANSCODER_B,
+ TRANSCODER_C,
+ TRANSCODER_EDP,
+ TRANSCODER_DSI_A,
+ TRANSCODER_DSI_C,
+
+ I915_MAX_TRANSCODERS
+};
+
+static inline const char *transcoder_name(enum transcoder transcoder)
+{
+ switch (transcoder) {
+ case TRANSCODER_A:
+ return "A";
+ case TRANSCODER_B:
+ return "B";
+ case TRANSCODER_C:
+ return "C";
+ case TRANSCODER_EDP:
+ return "EDP";
+ case TRANSCODER_DSI_A:
+ return "DSI A";
+ case TRANSCODER_DSI_C:
+ return "DSI C";
+ default:
+ return "<invalid>";
+ }
+}
+
+static inline bool transcoder_is_dsi(enum transcoder transcoder)
+{
+ return transcoder == TRANSCODER_DSI_A || transcoder == TRANSCODER_DSI_C;
+}
+
+/*
+ * Global legacy plane identifier. Valid only for primary/sprite
+ * planes on pre-g4x, and only for primary planes on g4x-bdw.
+ */
+enum i9xx_plane_id {
+ PLANE_A,
+ PLANE_B,
+ PLANE_C,
+};
+
+#define plane_name(p) ((p) + 'A')
+#define sprite_name(p, s) ((p) * INTEL_INFO(dev_priv)->num_sprites[(p)] + (s) + 'A')
+
+/*
+ * Per-pipe plane identifier.
+ * I915_MAX_PLANES in the enum below is the maximum (across all platforms)
+ * number of planes per CRTC. Not all platforms really have this many planes,
+ * which means some arrays of size I915_MAX_PLANES may have unused entries
+ * between the topmost sprite plane and the cursor plane.
+ *
+ * This is expected to be passed to various register macros
+ * (eg. PLANE_CTL(), PS_PLANE_SEL(), etc.) so adjust with care.
+ */
+enum plane_id {
+ PLANE_PRIMARY,
+ PLANE_SPRITE0,
+ PLANE_SPRITE1,
+ PLANE_SPRITE2,
+ PLANE_CURSOR,
+
+ I915_MAX_PLANES,
+};
+
+#define for_each_plane_id_on_crtc(__crtc, __p) \
+ for ((__p) = PLANE_PRIMARY; (__p) < I915_MAX_PLANES; (__p)++) \
+ for_each_if((__crtc)->plane_ids_mask & BIT(__p))
+
+enum port {
+ PORT_NONE = -1,
+
+ PORT_A = 0,
+ PORT_B,
+ PORT_C,
+ PORT_D,
+ PORT_E,
+
+ I915_MAX_PORTS
+};
+
+#define port_name(p) ((p) + 'A')
+
+enum dpio_channel {
+ DPIO_CH0,
+ DPIO_CH1
+};
+
+enum dpio_phy {
+ DPIO_PHY0,
+ DPIO_PHY1,
+ DPIO_PHY2,
+};
+
+#define I915_NUM_PHYS_VLV 2
+
+enum intel_display_power_domain {
+ POWER_DOMAIN_PIPE_A,
+ POWER_DOMAIN_PIPE_B,
+ POWER_DOMAIN_PIPE_C,
+ POWER_DOMAIN_PIPE_A_PANEL_FITTER,
+ POWER_DOMAIN_PIPE_B_PANEL_FITTER,
+ POWER_DOMAIN_PIPE_C_PANEL_FITTER,
+ POWER_DOMAIN_TRANSCODER_A,
+ POWER_DOMAIN_TRANSCODER_B,
+ POWER_DOMAIN_TRANSCODER_C,
+ POWER_DOMAIN_TRANSCODER_EDP,
+ POWER_DOMAIN_TRANSCODER_DSI_A,
+ POWER_DOMAIN_TRANSCODER_DSI_C,
+ POWER_DOMAIN_PORT_DDI_A_LANES,
+ POWER_DOMAIN_PORT_DDI_B_LANES,
+ POWER_DOMAIN_PORT_DDI_C_LANES,
+ POWER_DOMAIN_PORT_DDI_D_LANES,
+ POWER_DOMAIN_PORT_DDI_E_LANES,
+ POWER_DOMAIN_PORT_DDI_A_IO,
+ POWER_DOMAIN_PORT_DDI_B_IO,
+ POWER_DOMAIN_PORT_DDI_C_IO,
+ POWER_DOMAIN_PORT_DDI_D_IO,
+ POWER_DOMAIN_PORT_DDI_E_IO,
+ POWER_DOMAIN_PORT_DSI,
+ POWER_DOMAIN_PORT_CRT,
+ POWER_DOMAIN_PORT_OTHER,
+ POWER_DOMAIN_VGA,
+ POWER_DOMAIN_AUDIO,
+ POWER_DOMAIN_PLLS,
+ POWER_DOMAIN_AUX_A,
+ POWER_DOMAIN_AUX_B,
+ POWER_DOMAIN_AUX_C,
+ POWER_DOMAIN_AUX_D,
+ POWER_DOMAIN_GMBUS,
+ POWER_DOMAIN_MODESET,
+ POWER_DOMAIN_GT_IRQ,
+ POWER_DOMAIN_INIT,
+
+ POWER_DOMAIN_NUM,
+};
+
+#define POWER_DOMAIN_PIPE(pipe) ((pipe) + POWER_DOMAIN_PIPE_A)
+#define POWER_DOMAIN_PIPE_PANEL_FITTER(pipe) \
+ ((pipe) + POWER_DOMAIN_PIPE_A_PANEL_FITTER)
+#define POWER_DOMAIN_TRANSCODER(tran) \
+ ((tran) == TRANSCODER_EDP ? POWER_DOMAIN_TRANSCODER_EDP : \
+ (tran) + POWER_DOMAIN_TRANSCODER_A)
+
+/* Used by dp and fdi links */
+struct intel_link_m_n {
+ u32 tu;
+ u32 gmch_m;
+ u32 gmch_n;
+ u32 link_m;
+ u32 link_n;
+};
+
+#define for_each_pipe(__dev_priv, __p) \
+ for ((__p) = 0; (__p) < INTEL_INFO(__dev_priv)->num_pipes; (__p)++)
+
+#define for_each_pipe_masked(__dev_priv, __p, __mask) \
+ for ((__p) = 0; (__p) < INTEL_INFO(__dev_priv)->num_pipes; (__p)++) \
+ for_each_if((__mask) & BIT(__p))
+
+#define for_each_universal_plane(__dev_priv, __pipe, __p) \
+ for ((__p) = 0; \
+ (__p) < INTEL_INFO(__dev_priv)->num_sprites[(__pipe)] + 1; \
+ (__p)++)
+
+#define for_each_sprite(__dev_priv, __p, __s) \
+ for ((__s) = 0; \
+ (__s) < INTEL_INFO(__dev_priv)->num_sprites[(__p)]; \
+ (__s)++)
+
+#define for_each_port_masked(__port, __ports_mask) \
+ for ((__port) = PORT_A; (__port) < I915_MAX_PORTS; (__port)++) \
+ for_each_if((__ports_mask) & BIT(__port))
+
+#define for_each_crtc(dev, crtc) \
+ list_for_each_entry(crtc, &(dev)->mode_config.crtc_list, head)
+
+#define for_each_intel_plane(dev, intel_plane) \
+ list_for_each_entry(intel_plane, \
+ &(dev)->mode_config.plane_list, \
+ base.head)
+
+#define for_each_intel_plane_mask(dev, intel_plane, plane_mask) \
+ list_for_each_entry(intel_plane, \
+ &(dev)->mode_config.plane_list, \
+ base.head) \
+ for_each_if((plane_mask) & \
+ BIT(drm_plane_index(&intel_plane->base)))
+
+#define for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) \
+ list_for_each_entry(intel_plane, \
+ &(dev)->mode_config.plane_list, \
+ base.head) \
+ for_each_if((intel_plane)->pipe == (intel_crtc)->pipe)
+
+#define for_each_intel_crtc(dev, intel_crtc) \
+ list_for_each_entry(intel_crtc, \
+ &(dev)->mode_config.crtc_list, \
+ base.head)
+
+#define for_each_intel_crtc_mask(dev, intel_crtc, crtc_mask) \
+ list_for_each_entry(intel_crtc, \
+ &(dev)->mode_config.crtc_list, \
+ base.head) \
+ for_each_if((crtc_mask) & BIT(drm_crtc_index(&intel_crtc->base)))
+
+#define for_each_intel_encoder(dev, intel_encoder) \
+ list_for_each_entry(intel_encoder, \
+ &(dev)->mode_config.encoder_list, \
+ base.head)
+
+#define for_each_intel_connector_iter(intel_connector, iter) \
+ while ((intel_connector = to_intel_connector(drm_connector_list_iter_next(iter))))
+
+#define for_each_encoder_on_crtc(dev, __crtc, intel_encoder) \
+ list_for_each_entry((intel_encoder), &(dev)->mode_config.encoder_list, base.head) \
+ for_each_if((intel_encoder)->base.crtc == (__crtc))
+
+#define for_each_connector_on_encoder(dev, __encoder, intel_connector) \
+ list_for_each_entry((intel_connector), &(dev)->mode_config.connector_list, base.head) \
+ for_each_if((intel_connector)->base.encoder == (__encoder))
+
+#define for_each_power_domain(domain, mask) \
+ for ((domain) = 0; (domain) < POWER_DOMAIN_NUM; (domain)++) \
+ for_each_if(BIT_ULL(domain) & (mask))
+
+#define for_each_power_well(__dev_priv, __power_well) \
+ for ((__power_well) = (__dev_priv)->power_domains.power_wells; \
+ (__power_well) - (__dev_priv)->power_domains.power_wells < \
+ (__dev_priv)->power_domains.power_well_count; \
+ (__power_well)++)
+
+#define for_each_power_well_rev(__dev_priv, __power_well) \
+ for ((__power_well) = (__dev_priv)->power_domains.power_wells + \
+ (__dev_priv)->power_domains.power_well_count - 1; \
+ (__power_well) - (__dev_priv)->power_domains.power_wells >= 0; \
+ (__power_well)--)
+
+#define for_each_power_domain_well(__dev_priv, __power_well, __domain_mask) \
+ for_each_power_well(__dev_priv, __power_well) \
+ for_each_if((__power_well)->domains & (__domain_mask))
+
+#define for_each_power_domain_well_rev(__dev_priv, __power_well, __domain_mask) \
+ for_each_power_well_rev(__dev_priv, __power_well) \
+ for_each_if((__power_well)->domains & (__domain_mask))
+
+#define for_each_new_intel_plane_in_state(__state, plane, new_plane_state, __i) \
+ for ((__i) = 0; \
+ (__i) < (__state)->base.dev->mode_config.num_total_plane && \
+ ((plane) = to_intel_plane((__state)->base.planes[__i].ptr), \
+ (new_plane_state) = to_intel_plane_state((__state)->base.planes[__i].new_state), 1); \
+ (__i)++) \
+ for_each_if(plane)
+
+#define for_each_new_intel_crtc_in_state(__state, crtc, new_crtc_state, __i) \
+ for ((__i) = 0; \
+ (__i) < (__state)->base.dev->mode_config.num_crtc && \
+ ((crtc) = to_intel_crtc((__state)->base.crtcs[__i].ptr), \
+ (new_crtc_state) = to_intel_crtc_state((__state)->base.crtcs[__i].new_state), 1); \
+ (__i)++) \
+ for_each_if(crtc)
+
+#define for_each_oldnew_intel_plane_in_state(__state, plane, old_plane_state, new_plane_state, __i) \
+ for ((__i) = 0; \
+ (__i) < (__state)->base.dev->mode_config.num_total_plane && \
+ ((plane) = to_intel_plane((__state)->base.planes[__i].ptr), \
+ (old_plane_state) = to_intel_plane_state((__state)->base.planes[__i].old_state), \
+ (new_plane_state) = to_intel_plane_state((__state)->base.planes[__i].new_state), 1); \
+ (__i)++) \
+ for_each_if(plane)
+
+void intel_link_compute_m_n(int bpp, int nlanes,
+ int pixel_clock, int link_clock,
+ struct intel_link_m_n *m_n,
+ bool reduce_m_n);
+
+#endif
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 510e0bc3a377..ebdcbcbacb3c 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1272,6 +1272,9 @@ static int cnl_init_workarounds(struct intel_engine_cs *engine)
if (ret)
return ret;
+ /* WaDisableEarlyEOT:cnl */
+ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
+
return 0;
}
@@ -1664,6 +1667,35 @@ static void print_request(struct drm_printer *m,
rq->timeline->common->name);
}
+static void hexdump(struct drm_printer *m, const void *buf, size_t len)
+{
+ const size_t rowsize = 8 * sizeof(u32);
+ const void *prev = NULL;
+ bool skip = false;
+ size_t pos;
+
+ for (pos = 0; pos < len; pos += rowsize) {
+ char line[128];
+
+ if (prev && !memcmp(prev, buf + pos, rowsize)) {
+ if (!skip) {
+ drm_printf(m, "*\n");
+ skip = true;
+ }
+ continue;
+ }
+
+ WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
+ rowsize, sizeof(u32),
+ line, sizeof(line),
+ false) >= sizeof(line));
+ drm_printf(m, "%08zx %s\n", pos, line);
+
+ prev = buf + pos;
+ skip = false;
+ }
+}
+
void intel_engine_dump(struct intel_engine_cs *engine,
struct drm_printer *m,
const char *header, ...)
@@ -1757,6 +1789,24 @@ void intel_engine_dump(struct intel_engine_cs *engine,
addr = intel_engine_get_last_batch_head(engine);
drm_printf(m, "\tBBADDR: 0x%08x_%08x\n",
upper_32_bits(addr), lower_32_bits(addr));
+ if (INTEL_GEN(dev_priv) >= 8)
+ addr = I915_READ64_2x32(RING_DMA_FADD(engine->mmio_base),
+ RING_DMA_FADD_UDW(engine->mmio_base));
+ else if (INTEL_GEN(dev_priv) >= 4)
+ addr = I915_READ(RING_DMA_FADD(engine->mmio_base));
+ else
+ addr = I915_READ(DMA_FADD_I8XX);
+ drm_printf(m, "\tDMA_FADDR: 0x%08x_%08x\n",
+ upper_32_bits(addr), lower_32_bits(addr));
+ if (INTEL_GEN(dev_priv) >= 4) {
+ drm_printf(m, "\tIPEIR: 0x%08x\n",
+ I915_READ(RING_IPEIR(engine->mmio_base)));
+ drm_printf(m, "\tIPEHR: 0x%08x\n",
+ I915_READ(RING_IPEHR(engine->mmio_base)));
+ } else {
+ drm_printf(m, "\tIPEIR: 0x%08x\n", I915_READ(IPEIR));
+ drm_printf(m, "\tIPEHR: 0x%08x\n", I915_READ(IPEHR));
+ }
if (HAS_EXECLISTS(dev_priv)) {
const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
@@ -1848,8 +1898,11 @@ void intel_engine_dump(struct intel_engine_cs *engine,
&engine->irq_posted)),
yesno(test_bit(ENGINE_IRQ_EXECLIST,
&engine->irq_posted)));
+
+ drm_printf(m, "HWSP:\n");
+ hexdump(m, engine->status_page.page_addr, PAGE_SIZE);
+
drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine)));
- drm_printf(m, "\n");
}
static u8 user_class_map[] = {
diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
index 0acd9dd3ed5c..31f01d64c021 100644
--- a/drivers/gpu/drm/i915/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/intel_hangcheck.c
@@ -429,18 +429,18 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
intel_uncore_arm_unclaimed_mmio_detection(dev_priv);
for_each_engine(engine, dev_priv, id) {
- struct intel_engine_hangcheck cur_state, *hc = &cur_state;
const bool busy = intel_engine_has_waiter(engine);
+ struct intel_engine_hangcheck hc;
semaphore_clear_deadlocks(dev_priv);
- hangcheck_load_sample(engine, hc);
- hangcheck_accumulate_sample(engine, hc);
- hangcheck_store_sample(engine, hc);
+ hangcheck_load_sample(engine, &hc);
+ hangcheck_accumulate_sample(engine, &hc);
+ hangcheck_store_sample(engine, &hc);
if (engine->hangcheck.stalled) {
hung |= intel_engine_flag(engine);
- if (hc->action != ENGINE_DEAD)
+ if (hc.action != ENGINE_DEAD)
stuck |= intel_engine_flag(engine);
}
diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c
index 49fdf09f9919..ef9f91a0b0c9 100644
--- a/drivers/gpu/drm/i915/intel_i2c.c
+++ b/drivers/gpu/drm/i915/intel_i2c.c
@@ -128,22 +128,46 @@ intel_i2c_reset(struct drm_i915_private *dev_priv)
I915_WRITE(GMBUS4, 0);
}
-static void intel_i2c_quirk_set(struct drm_i915_private *dev_priv, bool enable)
+static void pnv_gmbus_clock_gating(struct drm_i915_private *dev_priv,
+ bool enable)
{
u32 val;
/* When using bit bashing for I2C, this bit needs to be set to 1 */
- if (!IS_PINEVIEW(dev_priv))
- return;
-
val = I915_READ(DSPCLK_GATE_D);
- if (enable)
- val |= DPCUNIT_CLOCK_GATE_DISABLE;
+ if (!enable)
+ val |= PNV_GMBUSUNIT_CLOCK_GATE_DISABLE;
else
- val &= ~DPCUNIT_CLOCK_GATE_DISABLE;
+ val &= ~PNV_GMBUSUNIT_CLOCK_GATE_DISABLE;
I915_WRITE(DSPCLK_GATE_D, val);
}
+static void pch_gmbus_clock_gating(struct drm_i915_private *dev_priv,
+ bool enable)
+{
+ u32 val;
+
+ val = I915_READ(SOUTH_DSPCLK_GATE_D);
+ if (!enable)
+ val |= PCH_GMBUSUNIT_CLOCK_GATE_DISABLE;
+ else
+ val &= ~PCH_GMBUSUNIT_CLOCK_GATE_DISABLE;
+ I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
+}
+
+static void bxt_gmbus_clock_gating(struct drm_i915_private *dev_priv,
+ bool enable)
+{
+ u32 val;
+
+ val = I915_READ(GEN9_CLKGATE_DIS_4);
+ if (!enable)
+ val |= BXT_GMBUS_GATING_DIS;
+ else
+ val &= ~BXT_GMBUS_GATING_DIS;
+ I915_WRITE(GEN9_CLKGATE_DIS_4, val);
+}
+
static u32 get_reserved(struct intel_gmbus *bus)
{
struct drm_i915_private *dev_priv = bus->dev_priv;
@@ -221,7 +245,10 @@ intel_gpio_pre_xfer(struct i2c_adapter *adapter)
struct drm_i915_private *dev_priv = bus->dev_priv;
intel_i2c_reset(dev_priv);
- intel_i2c_quirk_set(dev_priv, true);
+
+ if (IS_PINEVIEW(dev_priv))
+ pnv_gmbus_clock_gating(dev_priv, false);
+
set_data(bus, 1);
set_clock(bus, 1);
udelay(I2C_RISEFALL_TIME);
@@ -238,7 +265,9 @@ intel_gpio_post_xfer(struct i2c_adapter *adapter)
set_data(bus, 1);
set_clock(bus, 1);
- intel_i2c_quirk_set(dev_priv, false);
+
+ if (IS_PINEVIEW(dev_priv))
+ pnv_gmbus_clock_gating(dev_priv, true);
}
static void
@@ -481,6 +510,13 @@ do_gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num)
int i = 0, inc, try = 0;
int ret = 0;
+ /* Display WA #0868: skl,bxt,kbl,cfl,glk,cnl */
+ if (IS_GEN9_LP(dev_priv))
+ bxt_gmbus_clock_gating(dev_priv, false);
+ else if (HAS_PCH_SPT(dev_priv) ||
+ HAS_PCH_KBP(dev_priv) || HAS_PCH_CNP(dev_priv))
+ pch_gmbus_clock_gating(dev_priv, false);
+
retry:
I915_WRITE_FW(GMBUS0, bus->reg0);
@@ -582,6 +618,13 @@ timeout:
ret = -EAGAIN;
out:
+ /* Display WA #0868: skl,bxt,kbl,cfl,glk,cnl */
+ if (IS_GEN9_LP(dev_priv))
+ bxt_gmbus_clock_gating(dev_priv, true);
+ else if (HAS_PCH_SPT(dev_priv) ||
+ HAS_PCH_KBP(dev_priv) || HAS_PCH_CNP(dev_priv))
+ pch_gmbus_clock_gating(dev_priv, true);
+
return ret;
}
diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c
index 3bf65288ffff..5809b29044fc 100644
--- a/drivers/gpu/drm/i915/intel_lpe_audio.c
+++ b/drivers/gpu/drm/i915/intel_lpe_audio.c
@@ -193,7 +193,7 @@ static bool lpe_audio_detect(struct drm_i915_private *dev_priv)
};
if (!pci_dev_present(atom_hdaudio_ids)) {
- DRM_INFO("%s\n", "HDaudio controller not detected, using LPE audio instead\n");
+ DRM_INFO("HDaudio controller not detected, using LPE audio instead\n");
lpe_present = true;
}
}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 2e38fbfdf08f..739c33b07c59 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -449,7 +449,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%x\n",
engine->name, n,
- rq->ctx->hw_id, count,
+ port[n].context_id, count,
rq->global_seqno);
} else {
GEM_BUG_ON(!n);
@@ -504,7 +504,7 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
ce->ring->tail &= (ce->ring->size - 1);
ce->lrc_reg_state[CTX_RING_TAIL+1] = ce->ring->tail;
- GEM_TRACE("\n");
+ GEM_TRACE("%s\n", engine->name);
for (n = execlists_num_ports(&engine->execlists); --n; )
elsp_write(0, engine->execlists.elsp);
@@ -861,9 +861,10 @@ static void execlists_submission_tasklet(unsigned long data)
*/
status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
- GEM_TRACE("%s csb[%dd]: status=0x%08x:0x%08x\n",
+ GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
engine->name, head,
- status, buf[2*head + 1]);
+ status, buf[2*head + 1],
+ execlists->active);
if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
GEN8_CTX_STATUS_PREEMPTED))
@@ -881,6 +882,8 @@ static void execlists_submission_tasklet(unsigned long data)
if (status & GEN8_CTX_STATUS_COMPLETE &&
buf[2*head + 1] == PREEMPT_ID) {
+ GEM_TRACE("%s preempt-idle\n", engine->name);
+
execlists_cancel_port_requests(execlists);
execlists_unwind_incomplete_requests(execlists);
@@ -905,8 +908,8 @@ static void execlists_submission_tasklet(unsigned long data)
rq = port_unpack(port, &count);
GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x\n",
engine->name,
- rq->ctx->hw_id, count,
- rq->global_seqno);
+ port->context_id, count,
+ rq ? rq->global_seqno : 0);
GEM_BUG_ON(count == 0);
if (--count == 0) {
GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
@@ -1555,6 +1558,8 @@ static void reset_common_ring(struct intel_engine_cs *engine,
struct intel_context *ce;
unsigned long flags;
+ GEM_TRACE("%s seqno=%x\n",
+ engine->name, request ? request->global_seqno : 0);
spin_lock_irqsave(&engine->timeline->lock, flags);
/*
diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c
index fc65f5e451dd..c58e5f53bab0 100644
--- a/drivers/gpu/drm/i915/intel_opregion.c
+++ b/drivers/gpu/drm/i915/intel_opregion.c
@@ -32,6 +32,8 @@
#include <drm/drmP.h>
#include <drm/i915_drm.h>
+
+#include "intel_opregion.h"
#include "i915_drv.h"
#include "intel_drv.h"
diff --git a/drivers/gpu/drm/i915/intel_opregion.h b/drivers/gpu/drm/i915/intel_opregion.h
new file mode 100644
index 000000000000..e0e437ba9e51
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_opregion.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright © 2008-2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _INTEL_OPREGION_H_
+#define _INTEL_OPREGION_H_
+
+#include <linux/workqueue.h>
+#include <linux/pci.h>
+
+struct drm_i915_private;
+struct intel_encoder;
+
+struct opregion_header;
+struct opregion_acpi;
+struct opregion_swsci;
+struct opregion_asle;
+
+struct intel_opregion {
+ struct opregion_header *header;
+ struct opregion_acpi *acpi;
+ struct opregion_swsci *swsci;
+ u32 swsci_gbda_sub_functions;
+ u32 swsci_sbcb_sub_functions;
+ struct opregion_asle *asle;
+ void *rvda;
+ void *vbt_firmware;
+ const void *vbt;
+ u32 vbt_size;
+ u32 *lid_state;
+ struct work_struct asle_work;
+};
+
+#define OPREGION_SIZE (8 * 1024)
+
+#ifdef CONFIG_ACPI
+
+int intel_opregion_setup(struct drm_i915_private *dev_priv);
+void intel_opregion_register(struct drm_i915_private *dev_priv);
+void intel_opregion_unregister(struct drm_i915_private *dev_priv);
+void intel_opregion_asle_intr(struct drm_i915_private *dev_priv);
+int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder,
+ bool enable);
+int intel_opregion_notify_adapter(struct drm_i915_private *dev_priv,
+ pci_power_t state);
+int intel_opregion_get_panel_type(struct drm_i915_private *dev_priv);
+
+#else /* CONFIG_ACPI*/
+
+static inline int intel_opregion_setup(struct drm_i915_private *dev_priv)
+{
+ return 0;
+}
+
+static inline void intel_opregion_register(struct drm_i915_private *dev_priv)
+{
+}
+
+static inline void intel_opregion_unregister(struct drm_i915_private *dev_priv)
+{
+}
+
+static inline void intel_opregion_asle_intr(struct drm_i915_private *dev_priv)
+{
+}
+
+static inline int
+intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, bool enable)
+{
+ return 0;
+}
+
+static inline int
+intel_opregion_notify_adapter(struct drm_i915_private *dev, pci_power_t state)
+{
+ return 0;
+}
+
+static inline int intel_opregion_get_panel_type(struct drm_i915_private *dev)
+{
+ return -ENODEV;
+}
+
+#endif /* CONFIG_ACPI */
+
+#endif
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 18779c6eb4bf..1db79a860b96 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -8447,6 +8447,11 @@ static void cnl_init_clock_gating(struct drm_i915_private *dev_priv)
if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0))
val |= SARBUNIT_CLKGATE_DIS;
I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, val);
+
+ /* WaDisableVFclkgate:cnl */
+ val = I915_READ(UNSLICE_UNIT_LEVEL_CLKGATE);
+ val |= VFUNIT_CLKGATE_DIS;
+ I915_WRITE(UNSLICE_UNIT_LEVEL_CLKGATE, val);
}
static void cfl_init_clock_gating(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c
index a1ad85fa5c1a..2e32615eeada 100644
--- a/drivers/gpu/drm/i915/intel_psr.c
+++ b/drivers/gpu/drm/i915/intel_psr.c
@@ -590,7 +590,7 @@ static void hsw_psr_disable(struct intel_dp *intel_dp,
struct drm_i915_private *dev_priv = to_i915(dev);
if (dev_priv->psr.active) {
- i915_reg_t psr_ctl;
+ i915_reg_t psr_status;
u32 psr_status_mask;
if (dev_priv->psr.aux_frame_sync)
@@ -599,24 +599,24 @@ static void hsw_psr_disable(struct intel_dp *intel_dp,
0);
if (dev_priv->psr.psr2_support) {
- psr_ctl = EDP_PSR2_CTL;
+ psr_status = EDP_PSR2_STATUS_CTL;
psr_status_mask = EDP_PSR2_STATUS_STATE_MASK;
- I915_WRITE(psr_ctl,
- I915_READ(psr_ctl) &
+ I915_WRITE(EDP_PSR2_CTL,
+ I915_READ(EDP_PSR2_CTL) &
~(EDP_PSR2_ENABLE | EDP_SU_TRACK_ENABLE));
} else {
- psr_ctl = EDP_PSR_STATUS_CTL;
+ psr_status = EDP_PSR_STATUS_CTL;
psr_status_mask = EDP_PSR_STATUS_STATE_MASK;
- I915_WRITE(psr_ctl,
- I915_READ(psr_ctl) & ~EDP_PSR_ENABLE);
+ I915_WRITE(EDP_PSR_CTL,
+ I915_READ(EDP_PSR_CTL) & ~EDP_PSR_ENABLE);
}
/* Wait till PSR is idle */
if (intel_wait_for_register(dev_priv,
- psr_ctl, psr_status_mask, 0,
+ psr_status, psr_status_mask, 0,
2000))
DRM_ERROR("Timed out waiting for PSR Idle State\n");
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 96ab74f3d101..db9d57f39534 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -1726,13 +1726,13 @@ void intel_display_power_put(struct drm_i915_private *dev_priv,
BIT_ULL(POWER_DOMAIN_AUX_C) | \
BIT_ULL(POWER_DOMAIN_AUDIO) | \
BIT_ULL(POWER_DOMAIN_VGA) | \
- BIT_ULL(POWER_DOMAIN_GMBUS) | \
BIT_ULL(POWER_DOMAIN_INIT))
#define BXT_DISPLAY_DC_OFF_POWER_DOMAINS ( \
BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS | \
BIT_ULL(POWER_DOMAIN_GT_IRQ) | \
BIT_ULL(POWER_DOMAIN_MODESET) | \
BIT_ULL(POWER_DOMAIN_AUX_A) | \
+ BIT_ULL(POWER_DOMAIN_GMBUS) | \
BIT_ULL(POWER_DOMAIN_INIT))
#define BXT_DPIO_CMN_A_POWER_DOMAINS ( \
BIT_ULL(POWER_DOMAIN_PORT_DDI_A_LANES) | \
@@ -1792,6 +1792,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv,
BIT_ULL(POWER_DOMAIN_GT_IRQ) | \
BIT_ULL(POWER_DOMAIN_MODESET) | \
BIT_ULL(POWER_DOMAIN_AUX_A) | \
+ BIT_ULL(POWER_DOMAIN_GMBUS) | \
BIT_ULL(POWER_DOMAIN_INIT))
#define CNL_DISPLAY_POWERWELL_2_POWER_DOMAINS ( \
diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
index e6b31041cc88..2ea69394f428 100644
--- a/drivers/gpu/drm/i915/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
@@ -1637,7 +1637,7 @@ static int igt_shrink_thp(void *arg)
* shmem to truncate our pages.
*/
i915_gem_shrink_all(i915);
- if (!IS_ERR_OR_NULL(obj->mm.pages)) {
+ if (i915_gem_object_has_pages(obj)) {
pr_err("shrink-all didn't truncate the pages\n");
err = -EINVAL;
goto out_close;
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index f98546b8a7fa..d1f91a533afa 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -132,6 +132,12 @@ static int emit_recurse_batch(struct hang *h,
*batch++ = lower_32_bits(hws_address(hws, rq));
*batch++ = upper_32_bits(hws_address(hws, rq));
*batch++ = rq->fence.seqno;
+ *batch++ = MI_ARB_CHECK;
+
+ memset(batch, 0, 1024);
+ batch += 1024 / sizeof(*batch);
+
+ *batch++ = MI_ARB_CHECK;
*batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
*batch++ = lower_32_bits(vma->node.start);
*batch++ = upper_32_bits(vma->node.start);
@@ -140,6 +146,12 @@ static int emit_recurse_batch(struct hang *h,
*batch++ = 0;
*batch++ = lower_32_bits(hws_address(hws, rq));
*batch++ = rq->fence.seqno;
+ *batch++ = MI_ARB_CHECK;
+
+ memset(batch, 0, 1024);
+ batch += 1024 / sizeof(*batch);
+
+ *batch++ = MI_ARB_CHECK;
*batch++ = MI_BATCH_BUFFER_START | 1 << 8;
*batch++ = lower_32_bits(vma->node.start);
} else if (INTEL_GEN(i915) >= 4) {
@@ -147,12 +159,24 @@ static int emit_recurse_batch(struct hang *h,
*batch++ = 0;
*batch++ = lower_32_bits(hws_address(hws, rq));
*batch++ = rq->fence.seqno;
+ *batch++ = MI_ARB_CHECK;
+
+ memset(batch, 0, 1024);
+ batch += 1024 / sizeof(*batch);
+
+ *batch++ = MI_ARB_CHECK;
*batch++ = MI_BATCH_BUFFER_START | 2 << 6;
*batch++ = lower_32_bits(vma->node.start);
} else {
*batch++ = MI_STORE_DWORD_IMM;
*batch++ = lower_32_bits(hws_address(hws, rq));
*batch++ = rq->fence.seqno;
+ *batch++ = MI_ARB_CHECK;
+
+ memset(batch, 0, 1024);
+ batch += 1024 / sizeof(*batch);
+
+ *batch++ = MI_ARB_CHECK;
*batch++ = MI_BATCH_BUFFER_START | 2 << 6 | 1;
*batch++ = lower_32_bits(vma->node.start);
}
@@ -234,6 +258,16 @@ static void hang_fini(struct hang *h)
i915_gem_wait_for_idle(h->i915, I915_WAIT_LOCKED);
}
+static bool wait_for_hang(struct hang *h, struct drm_i915_gem_request *rq)
+{
+ return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq),
+ rq->fence.seqno),
+ 10) &&
+ wait_for(i915_seqno_passed(hws_seqno(h, rq),
+ rq->fence.seqno),
+ 1000));
+}
+
static int igt_hang_sanitycheck(void *arg)
{
struct drm_i915_private *i915 = arg;
@@ -296,6 +330,9 @@ static void global_reset_lock(struct drm_i915_private *i915)
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ pr_debug("%s: current gpu_error=%08lx\n",
+ __func__, i915->gpu_error.flags);
+
while (test_and_set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags))
wait_event(i915->gpu_error.reset_queue,
!test_bit(I915_RESET_BACKOFF,
@@ -353,54 +390,128 @@ static int igt_global_reset(void *arg)
return err;
}
-static int igt_reset_engine(void *arg)
+static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
{
- struct drm_i915_private *i915 = arg;
struct intel_engine_cs *engine;
enum intel_engine_id id;
- unsigned int reset_count, reset_engine_count;
+ struct hang h;
int err = 0;
- /* Check that we can issue a global GPU and engine reset */
+ /* Check that we can issue an engine reset on an idle engine (no-op) */
if (!intel_has_reset_engine(i915))
return 0;
+ if (active) {
+ mutex_lock(&i915->drm.struct_mutex);
+ err = hang_init(&h, i915);
+ mutex_unlock(&i915->drm.struct_mutex);
+ if (err)
+ return err;
+ }
+
for_each_engine(engine, i915, id) {
- set_bit(I915_RESET_ENGINE + engine->id, &i915->gpu_error.flags);
+ unsigned int reset_count, reset_engine_count;
+ IGT_TIMEOUT(end_time);
+
+ if (active && !intel_engine_can_store_dword(engine))
+ continue;
+
reset_count = i915_reset_count(&i915->gpu_error);
reset_engine_count = i915_reset_engine_count(&i915->gpu_error,
engine);
- err = i915_reset_engine(engine, I915_RESET_QUIET);
- if (err) {
- pr_err("i915_reset_engine failed\n");
- break;
- }
+ set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
+ do {
+ if (active) {
+ struct drm_i915_gem_request *rq;
+
+ mutex_lock(&i915->drm.struct_mutex);
+ rq = hang_create_request(&h, engine,
+ i915->kernel_context);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ mutex_unlock(&i915->drm.struct_mutex);
+ break;
+ }
+
+ i915_gem_request_get(rq);
+ __i915_add_request(rq, true);
+ mutex_unlock(&i915->drm.struct_mutex);
+
+ if (!wait_for_hang(&h, rq)) {
+ struct drm_printer p = drm_info_printer(i915->drm.dev);
+
+ pr_err("%s: Failed to start request %x, at %x\n",
+ __func__, rq->fence.seqno, hws_seqno(&h, rq));
+ intel_engine_dump(engine, &p,
+ "%s\n", engine->name);
+
+ i915_gem_request_put(rq);
+ err = -EIO;
+ break;
+ }
- if (i915_reset_count(&i915->gpu_error) != reset_count) {
- pr_err("Full GPU reset recorded! (engine reset expected)\n");
- err = -EINVAL;
- break;
- }
+ i915_gem_request_put(rq);
+ }
+
+ engine->hangcheck.stalled = true;
+ engine->hangcheck.seqno =
+ intel_engine_get_seqno(engine);
+
+ err = i915_reset_engine(engine, I915_RESET_QUIET);
+ if (err) {
+ pr_err("i915_reset_engine failed\n");
+ break;
+ }
+
+ if (i915_reset_count(&i915->gpu_error) != reset_count) {
+ pr_err("Full GPU reset recorded! (engine reset expected)\n");
+ err = -EINVAL;
+ break;
+ }
+
+ reset_engine_count += active;
+ if (i915_reset_engine_count(&i915->gpu_error, engine) !=
+ reset_engine_count) {
+ pr_err("%s engine reset %srecorded!\n",
+ engine->name, active ? "not " : "");
+ err = -EINVAL;
+ break;
+ }
+
+ engine->hangcheck.stalled = false;
+ } while (time_before(jiffies, end_time));
+ clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
- if (i915_reset_engine_count(&i915->gpu_error, engine) ==
- reset_engine_count) {
- pr_err("No %s engine reset recorded!\n", engine->name);
- err = -EINVAL;
+ if (err)
break;
- }
- clear_bit(I915_RESET_ENGINE + engine->id,
- &i915->gpu_error.flags);
+ cond_resched();
}
if (i915_terminally_wedged(&i915->gpu_error))
err = -EIO;
+ if (active) {
+ mutex_lock(&i915->drm.struct_mutex);
+ hang_fini(&h);
+ mutex_unlock(&i915->drm.struct_mutex);
+ }
+
return err;
}
+static int igt_reset_idle_engine(void *arg)
+{
+ return __igt_reset_engine(arg, false);
+}
+
+static int igt_reset_active_engine(void *arg)
+{
+ return __igt_reset_engine(arg, true);
+}
+
static int active_engine(void *data)
{
struct intel_engine_cs *engine = data;
@@ -462,11 +573,12 @@ err_file:
return err;
}
-static int igt_reset_active_engines(void *arg)
+static int __igt_reset_engine_others(struct drm_i915_private *i915,
+ bool active)
{
- struct drm_i915_private *i915 = arg;
- struct intel_engine_cs *engine, *active;
+ struct intel_engine_cs *engine, *other;
enum intel_engine_id id, tmp;
+ struct hang h;
int err = 0;
/* Check that issuing a reset on one engine does not interfere
@@ -476,24 +588,36 @@ static int igt_reset_active_engines(void *arg)
if (!intel_has_reset_engine(i915))
return 0;
+ if (active) {
+ mutex_lock(&i915->drm.struct_mutex);
+ err = hang_init(&h, i915);
+ mutex_unlock(&i915->drm.struct_mutex);
+ if (err)
+ return err;
+ }
+
for_each_engine(engine, i915, id) {
- struct task_struct *threads[I915_NUM_ENGINES];
+ struct task_struct *threads[I915_NUM_ENGINES] = {};
unsigned long resets[I915_NUM_ENGINES];
unsigned long global = i915_reset_count(&i915->gpu_error);
+ unsigned long count = 0;
IGT_TIMEOUT(end_time);
+ if (active && !intel_engine_can_store_dword(engine))
+ continue;
+
memset(threads, 0, sizeof(threads));
- for_each_engine(active, i915, tmp) {
+ for_each_engine(other, i915, tmp) {
struct task_struct *tsk;
- if (active == engine)
- continue;
-
resets[tmp] = i915_reset_engine_count(&i915->gpu_error,
- active);
+ other);
- tsk = kthread_run(active_engine, active,
- "igt/%s", active->name);
+ if (other == engine)
+ continue;
+
+ tsk = kthread_run(active_engine, other,
+ "igt/%s", other->name);
if (IS_ERR(tsk)) {
err = PTR_ERR(tsk);
goto unwind;
@@ -503,20 +627,70 @@ static int igt_reset_active_engines(void *arg)
get_task_struct(tsk);
}
- set_bit(I915_RESET_ENGINE + engine->id, &i915->gpu_error.flags);
+ set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
do {
+ if (active) {
+ struct drm_i915_gem_request *rq;
+
+ mutex_lock(&i915->drm.struct_mutex);
+ rq = hang_create_request(&h, engine,
+ i915->kernel_context);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ mutex_unlock(&i915->drm.struct_mutex);
+ break;
+ }
+
+ i915_gem_request_get(rq);
+ __i915_add_request(rq, true);
+ mutex_unlock(&i915->drm.struct_mutex);
+
+ if (!wait_for_hang(&h, rq)) {
+ struct drm_printer p = drm_info_printer(i915->drm.dev);
+
+ pr_err("%s: Failed to start request %x, at %x\n",
+ __func__, rq->fence.seqno, hws_seqno(&h, rq));
+ intel_engine_dump(engine, &p,
+ "%s\n", engine->name);
+
+ i915_gem_request_put(rq);
+ err = -EIO;
+ break;
+ }
+
+ i915_gem_request_put(rq);
+ }
+
+ engine->hangcheck.stalled = true;
+ engine->hangcheck.seqno =
+ intel_engine_get_seqno(engine);
+
err = i915_reset_engine(engine, I915_RESET_QUIET);
if (err) {
- pr_err("i915_reset_engine(%s) failed, err=%d\n",
- engine->name, err);
+ pr_err("i915_reset_engine(%s:%s) failed, err=%d\n",
+ engine->name, active ? "active" : "idle", err);
break;
}
+
+ engine->hangcheck.stalled = false;
+ count++;
} while (time_before(jiffies, end_time));
- clear_bit(I915_RESET_ENGINE + engine->id,
- &i915->gpu_error.flags);
+ clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
+ pr_info("i915_reset_engine(%s:%s): %lu resets\n",
+ engine->name, active ? "active" : "idle", count);
+
+ if (i915_reset_engine_count(&i915->gpu_error, engine) -
+ resets[engine->id] != (active ? count : 0)) {
+ pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n",
+ engine->name, active ? "active" : "idle", count,
+ i915_reset_engine_count(&i915->gpu_error,
+ engine) - resets[engine->id]);
+ if (!err)
+ err = -EINVAL;
+ }
unwind:
- for_each_engine(active, i915, tmp) {
+ for_each_engine(other, i915, tmp) {
int ret;
if (!threads[tmp])
@@ -524,27 +698,29 @@ unwind:
ret = kthread_stop(threads[tmp]);
if (ret) {
- pr_err("kthread for active engine %s failed, err=%d\n",
- active->name, ret);
+ pr_err("kthread for other engine %s failed, err=%d\n",
+ other->name, ret);
if (!err)
err = ret;
}
put_task_struct(threads[tmp]);
if (resets[tmp] != i915_reset_engine_count(&i915->gpu_error,
- active)) {
+ other)) {
pr_err("Innocent engine %s was reset (count=%ld)\n",
- active->name,
+ other->name,
i915_reset_engine_count(&i915->gpu_error,
- active) - resets[tmp]);
- err = -EIO;
+ other) - resets[tmp]);
+ if (!err)
+ err = -EINVAL;
}
}
if (global != i915_reset_count(&i915->gpu_error)) {
pr_err("Global reset (count=%ld)!\n",
i915_reset_count(&i915->gpu_error) - global);
- err = -EIO;
+ if (!err)
+ err = -EINVAL;
}
if (err)
@@ -556,9 +732,25 @@ unwind:
if (i915_terminally_wedged(&i915->gpu_error))
err = -EIO;
+ if (active) {
+ mutex_lock(&i915->drm.struct_mutex);
+ hang_fini(&h);
+ mutex_unlock(&i915->drm.struct_mutex);
+ }
+
return err;
}
+static int igt_reset_idle_engine_others(void *arg)
+{
+ return __igt_reset_engine_others(arg, false);
+}
+
+static int igt_reset_active_engine_others(void *arg)
+{
+ return __igt_reset_engine_others(arg, true);
+}
+
static u32 fake_hangcheck(struct drm_i915_gem_request *rq)
{
u32 reset_count;
@@ -574,16 +766,6 @@ static u32 fake_hangcheck(struct drm_i915_gem_request *rq)
return reset_count;
}
-static bool wait_for_hang(struct hang *h, struct drm_i915_gem_request *rq)
-{
- return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq),
- rq->fence.seqno),
- 10) &&
- wait_for(i915_seqno_passed(hws_seqno(h, rq),
- rq->fence.seqno),
- 1000));
-}
-
static int igt_wait_reset(void *arg)
{
struct drm_i915_private *i915 = arg;
@@ -617,8 +799,8 @@ static int igt_wait_reset(void *arg)
if (!wait_for_hang(&h, rq)) {
struct drm_printer p = drm_info_printer(i915->drm.dev);
- pr_err("Failed to start request %x, at %x\n",
- rq->fence.seqno, hws_seqno(&h, rq));
+ pr_err("%s: Failed to start request %x, at %x\n",
+ __func__, rq->fence.seqno, hws_seqno(&h, rq));
intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
i915_reset(i915, 0);
@@ -712,8 +894,8 @@ static int igt_reset_queue(void *arg)
if (!wait_for_hang(&h, prev)) {
struct drm_printer p = drm_info_printer(i915->drm.dev);
- pr_err("Failed to start request %x, at %x\n",
- prev->fence.seqno, hws_seqno(&h, prev));
+ pr_err("%s: Failed to start request %x, at %x\n",
+ __func__, prev->fence.seqno, hws_seqno(&h, prev));
intel_engine_dump(prev->engine, &p,
"%s\n", prev->engine->name);
@@ -819,8 +1001,8 @@ static int igt_handle_error(void *arg)
if (!wait_for_hang(&h, rq)) {
struct drm_printer p = drm_info_printer(i915->drm.dev);
- pr_err("Failed to start request %x, at %x\n",
- rq->fence.seqno, hws_seqno(&h, rq));
+ pr_err("%s: Failed to start request %x, at %x\n",
+ __func__, rq->fence.seqno, hws_seqno(&h, rq));
intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
i915_reset(i915, 0);
@@ -864,21 +1046,26 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
static const struct i915_subtest tests[] = {
SUBTEST(igt_global_reset), /* attempt to recover GPU first */
SUBTEST(igt_hang_sanitycheck),
- SUBTEST(igt_reset_engine),
- SUBTEST(igt_reset_active_engines),
+ SUBTEST(igt_reset_idle_engine),
+ SUBTEST(igt_reset_active_engine),
+ SUBTEST(igt_reset_idle_engine_others),
+ SUBTEST(igt_reset_active_engine_others),
SUBTEST(igt_wait_reset),
SUBTEST(igt_reset_queue),
SUBTEST(igt_handle_error),
};
+ bool saved_hangcheck;
int err;
if (!intel_has_gpu_reset(i915))
return 0;
intel_runtime_pm_get(i915);
+ saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck);
err = i915_subtests(tests, i915);
+ i915_modparams.enable_hangcheck = saved_hangcheck;
intel_runtime_pm_put(i915);
return err;
diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c
index 3f2b4afcb8a7..1d053bbefc02 100644
--- a/drivers/gpu/drm/imx/imx-drm-core.c
+++ b/drivers/gpu/drm/imx/imx-drm-core.c
@@ -257,6 +257,7 @@ static int imx_drm_bind(struct device *dev)
drm->mode_config.max_height = 4096;
drm->mode_config.funcs = &imx_drm_mode_config_funcs;
drm->mode_config.helper_private = &imx_drm_mode_config_helpers;
+ drm->mode_config.allow_fb_modifiers = true;
drm_mode_config_init(drm);
diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c
index 5a67daedcf4d..57ed56d8623f 100644
--- a/drivers/gpu/drm/imx/ipuv3-plane.c
+++ b/drivers/gpu/drm/imx/ipuv3-plane.c
@@ -77,6 +77,18 @@ static const uint32_t ipu_plane_formats[] = {
DRM_FORMAT_BGRX8888_A8,
};
+static const uint64_t ipu_format_modifiers[] = {
+ DRM_FORMAT_MOD_LINEAR,
+ DRM_FORMAT_MOD_INVALID
+};
+
+static const uint64_t pre_format_modifiers[] = {
+ DRM_FORMAT_MOD_LINEAR,
+ DRM_FORMAT_MOD_VIVANTE_TILED,
+ DRM_FORMAT_MOD_VIVANTE_SUPER_TILED,
+ DRM_FORMAT_MOD_INVALID
+};
+
int ipu_plane_irq(struct ipu_plane *ipu_plane)
{
return ipu_idmac_channel_irq(ipu_plane->ipu, ipu_plane->ipu_ch,
@@ -303,6 +315,22 @@ void ipu_plane_destroy_state(struct drm_plane *plane,
kfree(ipu_state);
}
+static bool ipu_plane_format_mod_supported(struct drm_plane *plane,
+ uint32_t format, uint64_t modifier)
+{
+ struct ipu_soc *ipu = to_ipu_plane(plane)->ipu;
+
+ /* linear is supported for all planes and formats */
+ if (modifier == DRM_FORMAT_MOD_LINEAR)
+ return true;
+
+ /* without a PRG there are no supported modifiers */
+ if (!ipu_prg_present(ipu))
+ return false;
+
+ return ipu_prg_format_supported(ipu, format, modifier);
+}
+
static const struct drm_plane_funcs ipu_plane_funcs = {
.update_plane = drm_atomic_helper_update_plane,
.disable_plane = drm_atomic_helper_disable_plane,
@@ -310,6 +338,7 @@ static const struct drm_plane_funcs ipu_plane_funcs = {
.reset = ipu_plane_state_reset,
.atomic_duplicate_state = ipu_plane_duplicate_state,
.atomic_destroy_state = ipu_plane_destroy_state,
+ .format_mod_supported = ipu_plane_format_mod_supported,
};
static int ipu_plane_atomic_check(struct drm_plane *plane,
@@ -550,8 +579,8 @@ static void ipu_plane_atomic_update(struct drm_plane *plane,
ipu_prg_channel_configure(ipu_plane->ipu_ch, axi_id,
drm_rect_width(&state->src) >> 16,
drm_rect_height(&state->src) >> 16,
- fb->pitches[0],
- fb->format->format, &eba);
+ fb->pitches[0], fb->format->format,
+ fb->modifier, &eba);
}
if (old_state->fb && !drm_atomic_crtc_needs_modeset(crtc_state)) {
@@ -700,18 +729,71 @@ static const struct drm_plane_helper_funcs ipu_plane_helper_funcs = {
int ipu_planes_assign_pre(struct drm_device *dev,
struct drm_atomic_state *state)
{
+ struct drm_crtc_state *old_crtc_state, *crtc_state;
struct drm_plane_state *plane_state;
+ struct ipu_plane_state *ipu_state;
+ struct ipu_plane *ipu_plane;
struct drm_plane *plane;
+ struct drm_crtc *crtc;
int available_pres = ipu_prg_max_active_channels();
- int i;
+ int ret, i;
+ for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, crtc_state, i) {
+ ret = drm_atomic_add_affected_planes(state, crtc);
+ if (ret)
+ return ret;
+ }
+
+ /*
+ * We are going over the planes in 2 passes: first we assign PREs to
+ * planes with a tiling modifier, which need the PREs to resolve into
+ * linear. Any failure to assign a PRE there is fatal. In the second
+ * pass we try to assign PREs to linear FBs, to improve memory access
+ * patterns for them. Failure at this point is non-fatal, as we can
+ * scan out linear FBs without a PRE.
+ */
for_each_new_plane_in_state(state, plane, plane_state, i) {
- struct ipu_plane_state *ipu_state =
- to_ipu_plane_state(plane_state);
- struct ipu_plane *ipu_plane = to_ipu_plane(plane);
+ ipu_state = to_ipu_plane_state(plane_state);
+ ipu_plane = to_ipu_plane(plane);
+
+ if (!plane_state->fb) {
+ ipu_state->use_pre = false;
+ continue;
+ }
+
+ if (!(plane_state->fb->flags & DRM_MODE_FB_MODIFIERS) ||
+ plane_state->fb->modifier == DRM_FORMAT_MOD_LINEAR)
+ continue;
+
+ if (!ipu_prg_present(ipu_plane->ipu) || !available_pres)
+ return -EINVAL;
+
+ if (!ipu_prg_format_supported(ipu_plane->ipu,
+ plane_state->fb->format->format,
+ plane_state->fb->modifier))
+ return -EINVAL;
+
+ ipu_state->use_pre = true;
+ available_pres--;
+ }
+
+ for_each_new_plane_in_state(state, plane, plane_state, i) {
+ ipu_state = to_ipu_plane_state(plane_state);
+ ipu_plane = to_ipu_plane(plane);
+
+ if (!plane_state->fb) {
+ ipu_state->use_pre = false;
+ continue;
+ }
+
+ if ((plane_state->fb->flags & DRM_MODE_FB_MODIFIERS) &&
+ plane_state->fb->modifier != DRM_FORMAT_MOD_LINEAR)
+ continue;
+
+ /* make sure that modifier is initialized */
+ plane_state->fb->modifier = DRM_FORMAT_MOD_LINEAR;
if (ipu_prg_present(ipu_plane->ipu) && available_pres &&
- plane_state->fb &&
ipu_prg_format_supported(ipu_plane->ipu,
plane_state->fb->format->format,
plane_state->fb->modifier)) {
@@ -731,6 +813,7 @@ struct ipu_plane *ipu_plane_init(struct drm_device *dev, struct ipu_soc *ipu,
enum drm_plane_type type)
{
struct ipu_plane *ipu_plane;
+ const uint64_t *modifiers = ipu_format_modifiers;
int ret;
DRM_DEBUG_KMS("channel %d, dp flow %d, possible_crtcs=0x%x\n",
@@ -746,10 +829,13 @@ struct ipu_plane *ipu_plane_init(struct drm_device *dev, struct ipu_soc *ipu,
ipu_plane->dma = dma;
ipu_plane->dp_flow = dp;
+ if (ipu_prg_present(ipu))
+ modifiers = pre_format_modifiers;
+
ret = drm_universal_plane_init(dev, &ipu_plane->base, possible_crtcs,
&ipu_plane_funcs, ipu_plane_formats,
ARRAY_SIZE(ipu_plane_formats),
- NULL, type, NULL);
+ modifiers, type, NULL);
if (ret) {
DRM_ERROR("failed to initialize plane\n");
kfree(ipu_plane);
diff --git a/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c b/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c
index 82dcb20cdaa3..fd02506274da 100644
--- a/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c
+++ b/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c
@@ -290,11 +290,6 @@ static int dw_mipi_dsi_stm_probe(struct platform_device *pdev)
return -ENOMEM;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!res) {
- DRM_ERROR("Unable to get resource\n");
- return -ENODEV;
- }
-
dsi->base = devm_ioremap_resource(dev, res);
if (IS_ERR(dsi->base)) {
DRM_ERROR("Unable to get dsi registers\n");
diff --git a/drivers/gpu/drm/stm/ltdc.c b/drivers/gpu/drm/stm/ltdc.c
index 394613b0fd46..6dc5d4ec4e17 100644
--- a/drivers/gpu/drm/stm/ltdc.c
+++ b/drivers/gpu/drm/stm/ltdc.c
@@ -901,12 +901,6 @@ int ltdc_load(struct drm_device *ddev)
}
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!res) {
- DRM_ERROR("Unable to get resource\n");
- ret = -ENODEV;
- goto err;
- }
-
ldev->regs = devm_ioremap_resource(dev, res);
if (IS_ERR(ldev->regs)) {
DRM_ERROR("Unable to get ltdc registers\n");
diff --git a/drivers/gpu/ipu-v3/Kconfig b/drivers/gpu/ipu-v3/Kconfig
index 87a20b3dcf7a..fe6f8c5b4445 100644
--- a/drivers/gpu/ipu-v3/Kconfig
+++ b/drivers/gpu/ipu-v3/Kconfig
@@ -1,7 +1,9 @@
config IMX_IPUV3_CORE
tristate "IPUv3 core support"
- depends on SOC_IMX5 || SOC_IMX6Q || ARCH_MULTIPLATFORM
+ depends on SOC_IMX5 || SOC_IMX6Q || ARCH_MULTIPLATFORM || COMPILE_TEST
depends on DRM || !DRM # if DRM=m, this can't be 'y'
+ select BITREVERSE
+ select GENERIC_ALLOCATOR if DRM
select GENERIC_IRQ_CHIP
help
Choose this if you have a i.MX5/6 system and want to use the Image
diff --git a/drivers/gpu/ipu-v3/ipu-cpmem.c b/drivers/gpu/ipu-v3/ipu-cpmem.c
index 1cb82f445f91..bb9c087e6c0d 100644
--- a/drivers/gpu/ipu-v3/ipu-cpmem.c
+++ b/drivers/gpu/ipu-v3/ipu-cpmem.c
@@ -12,6 +12,7 @@
#include <linux/types.h>
#include <linux/bitrev.h>
#include <linux/io.h>
+#include <linux/sizes.h>
#include <drm/drm_fourcc.h>
#include "ipu-prv.h"
diff --git a/drivers/gpu/ipu-v3/ipu-ic.c b/drivers/gpu/ipu-v3/ipu-ic.c
index 321eb983c2f5..67cc820253a9 100644
--- a/drivers/gpu/ipu-v3/ipu-ic.c
+++ b/drivers/gpu/ipu-v3/ipu-ic.c
@@ -17,6 +17,7 @@
#include <linux/bitrev.h>
#include <linux/io.h>
#include <linux/err.h>
+#include <linux/sizes.h>
#include "ipu-prv.h"
/* IC Register Offsets */
diff --git a/drivers/gpu/ipu-v3/ipu-pre.c b/drivers/gpu/ipu-v3/ipu-pre.c
index c860a7997cb5..f1cec3d70498 100644
--- a/drivers/gpu/ipu-v3/ipu-pre.c
+++ b/drivers/gpu/ipu-v3/ipu-pre.c
@@ -49,6 +49,10 @@
#define IPU_PRE_TPR_CTRL 0x070
#define IPU_PRE_TPR_CTRL_TILE_FORMAT(v) ((v & 0xff) << 0)
#define IPU_PRE_TPR_CTRL_TILE_FORMAT_MASK 0xff
+#define IPU_PRE_TPR_CTRL_TILE_FORMAT_16_BIT (1 << 0)
+#define IPU_PRE_TPR_CTRL_TILE_FORMAT_SPLIT_BUF (1 << 4)
+#define IPU_PRE_TPR_CTRL_TILE_FORMAT_SINGLE_BUF (1 << 5)
+#define IPU_PRE_TPR_CTRL_TILE_FORMAT_SUPER_TILED (1 << 6)
#define IPU_PRE_PREFETCH_ENG_CTRL 0x080
#define IPU_PRE_PREF_ENG_CTRL_PREFETCH_EN (1 << 0)
@@ -147,7 +151,7 @@ int ipu_pre_get(struct ipu_pre *pre)
val = IPU_PRE_CTRL_HANDSHAKE_ABORT_SKIP_EN |
IPU_PRE_CTRL_HANDSHAKE_EN |
IPU_PRE_CTRL_TPR_REST_SEL |
- IPU_PRE_CTRL_BLOCK_16 | IPU_PRE_CTRL_SDW_UPDATE;
+ IPU_PRE_CTRL_SDW_UPDATE;
writel(val, pre->regs + IPU_PRE_CTRL);
pre->in_use = true;
@@ -163,14 +167,17 @@ void ipu_pre_put(struct ipu_pre *pre)
void ipu_pre_configure(struct ipu_pre *pre, unsigned int width,
unsigned int height, unsigned int stride, u32 format,
- unsigned int bufaddr)
+ uint64_t modifier, unsigned int bufaddr)
{
const struct drm_format_info *info = drm_format_info(format);
u32 active_bpp = info->cpp[0] >> 1;
u32 val;
/* calculate safe window for ctrl register updates */
- pre->safe_window_end = height - 2;
+ if (modifier == DRM_FORMAT_MOD_LINEAR)
+ pre->safe_window_end = height - 2;
+ else
+ pre->safe_window_end = DIV_ROUND_UP(height, 4) - 1;
writel(bufaddr, pre->regs + IPU_PRE_CUR_BUF);
writel(bufaddr, pre->regs + IPU_PRE_NEXT_BUF);
@@ -203,9 +210,25 @@ void ipu_pre_configure(struct ipu_pre *pre, unsigned int width,
writel(pre->buffer_paddr, pre->regs + IPU_PRE_STORE_ENG_ADDR);
+ val = readl(pre->regs + IPU_PRE_TPR_CTRL);
+ val &= ~IPU_PRE_TPR_CTRL_TILE_FORMAT_MASK;
+ if (modifier != DRM_FORMAT_MOD_LINEAR) {
+ /* only support single buffer formats for now */
+ val |= IPU_PRE_TPR_CTRL_TILE_FORMAT_SINGLE_BUF;
+ if (modifier == DRM_FORMAT_MOD_VIVANTE_SUPER_TILED)
+ val |= IPU_PRE_TPR_CTRL_TILE_FORMAT_SUPER_TILED;
+ if (info->cpp[0] == 2)
+ val |= IPU_PRE_TPR_CTRL_TILE_FORMAT_16_BIT;
+ }
+ writel(val, pre->regs + IPU_PRE_TPR_CTRL);
+
val = readl(pre->regs + IPU_PRE_CTRL);
val |= IPU_PRE_CTRL_EN_REPEAT | IPU_PRE_CTRL_ENABLE |
IPU_PRE_CTRL_SDW_UPDATE;
+ if (modifier == DRM_FORMAT_MOD_LINEAR)
+ val &= ~IPU_PRE_CTRL_BLOCK_EN;
+ else
+ val |= IPU_PRE_CTRL_BLOCK_EN;
writel(val, pre->regs + IPU_PRE_CTRL);
}
diff --git a/drivers/gpu/ipu-v3/ipu-prg.c b/drivers/gpu/ipu-v3/ipu-prg.c
index 0013ca9f72c8..067365c733c6 100644
--- a/drivers/gpu/ipu-v3/ipu-prg.c
+++ b/drivers/gpu/ipu-v3/ipu-prg.c
@@ -20,6 +20,7 @@
#include <linux/module.h>
#include <linux/of.h>
#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
#include <linux/regmap.h>
#include <video/imx-ipu-v3.h>
@@ -132,28 +133,25 @@ bool ipu_prg_format_supported(struct ipu_soc *ipu, uint32_t format,
if (info->num_planes != 1)
return false;
- return true;
+ switch (modifier) {
+ case DRM_FORMAT_MOD_LINEAR:
+ case DRM_FORMAT_MOD_VIVANTE_TILED:
+ case DRM_FORMAT_MOD_VIVANTE_SUPER_TILED:
+ return true;
+ default:
+ return false;
+ }
}
EXPORT_SYMBOL_GPL(ipu_prg_format_supported);
int ipu_prg_enable(struct ipu_soc *ipu)
{
struct ipu_prg *prg = ipu->prg_priv;
- int ret;
if (!prg)
return 0;
- ret = clk_prepare_enable(prg->clk_axi);
- if (ret)
- goto fail_disable_ipg;
-
- return 0;
-
-fail_disable_ipg:
- clk_disable_unprepare(prg->clk_ipg);
-
- return ret;
+ return pm_runtime_get_sync(prg->dev);
}
EXPORT_SYMBOL_GPL(ipu_prg_enable);
@@ -164,7 +162,7 @@ void ipu_prg_disable(struct ipu_soc *ipu)
if (!prg)
return;
- clk_disable_unprepare(prg->clk_axi);
+ pm_runtime_put(prg->dev);
}
EXPORT_SYMBOL_GPL(ipu_prg_disable);
@@ -255,7 +253,7 @@ void ipu_prg_channel_disable(struct ipuv3_channel *ipu_chan)
if (!chan->enabled || prg_chan < 0)
return;
- clk_prepare_enable(prg->clk_ipg);
+ pm_runtime_get_sync(prg->dev);
val = readl(prg->regs + IPU_PRG_CTL);
val |= IPU_PRG_CTL_BYPASS(prg_chan);
@@ -264,7 +262,7 @@ void ipu_prg_channel_disable(struct ipuv3_channel *ipu_chan)
val = IPU_PRG_REG_UPDATE_REG_UPDATE;
writel(val, prg->regs + IPU_PRG_REG_UPDATE);
- clk_disable_unprepare(prg->clk_ipg);
+ pm_runtime_put(prg->dev);
ipu_prg_put_pre(prg, prg_chan);
@@ -275,7 +273,7 @@ EXPORT_SYMBOL_GPL(ipu_prg_channel_disable);
int ipu_prg_channel_configure(struct ipuv3_channel *ipu_chan,
unsigned int axi_id, unsigned int width,
unsigned int height, unsigned int stride,
- u32 format, unsigned long *eba)
+ u32 format, uint64_t modifier, unsigned long *eba)
{
int prg_chan = ipu_prg_ipu_to_prg_chan(ipu_chan->num);
struct ipu_prg *prg = ipu_chan->ipu->prg_priv;
@@ -296,14 +294,10 @@ int ipu_prg_channel_configure(struct ipuv3_channel *ipu_chan,
return ret;
ipu_pre_configure(prg->pres[chan->used_pre],
- width, height, stride, format, *eba);
+ width, height, stride, format, modifier, *eba);
- ret = clk_prepare_enable(prg->clk_ipg);
- if (ret) {
- ipu_prg_put_pre(prg, prg_chan);
- return ret;
- }
+ pm_runtime_get_sync(prg->dev);
val = (stride - 1) & IPU_PRG_STRIDE_STRIDE_MASK;
writel(val, prg->regs + IPU_PRG_STRIDE(prg_chan));
@@ -336,7 +330,7 @@ int ipu_prg_channel_configure(struct ipuv3_channel *ipu_chan,
(val & IPU_PRG_STATUS_BUFFER1_READY(prg_chan)),
5, 1000);
- clk_disable_unprepare(prg->clk_ipg);
+ pm_runtime_put(prg->dev);
chan->enabled = true;
return 0;
@@ -384,6 +378,12 @@ static int ipu_prg_probe(struct platform_device *pdev)
if (ret)
return ret;
+ ret = clk_prepare_enable(prg->clk_axi);
+ if (ret) {
+ clk_disable_unprepare(prg->clk_ipg);
+ return ret;
+ }
+
/* init to free running mode */
val = readl(prg->regs + IPU_PRG_CTL);
val |= IPU_PRG_CTL_SHADOW_EN;
@@ -392,7 +392,8 @@ static int ipu_prg_probe(struct platform_device *pdev)
/* disable address threshold */
writel(0xffffffff, prg->regs + IPU_PRG_THD);
- clk_disable_unprepare(prg->clk_ipg);
+ pm_runtime_set_active(dev);
+ pm_runtime_enable(dev);
prg->dev = dev;
platform_set_drvdata(pdev, prg);
@@ -414,6 +415,40 @@ static int ipu_prg_remove(struct platform_device *pdev)
return 0;
}
+#ifdef CONFIG_PM
+static int prg_suspend(struct device *dev)
+{
+ struct ipu_prg *prg = dev_get_drvdata(dev);
+
+ clk_disable_unprepare(prg->clk_axi);
+ clk_disable_unprepare(prg->clk_ipg);
+
+ return 0;
+}
+
+static int prg_resume(struct device *dev)
+{
+ struct ipu_prg *prg = dev_get_drvdata(dev);
+ int ret;
+
+ ret = clk_prepare_enable(prg->clk_ipg);
+ if (ret)
+ return ret;
+
+ ret = clk_prepare_enable(prg->clk_axi);
+ if (ret) {
+ clk_disable_unprepare(prg->clk_ipg);
+ return ret;
+ }
+
+ return 0;
+}
+#endif
+
+static const struct dev_pm_ops prg_pm_ops = {
+ SET_RUNTIME_PM_OPS(prg_suspend, prg_resume, NULL)
+};
+
static const struct of_device_id ipu_prg_dt_ids[] = {
{ .compatible = "fsl,imx6qp-prg", },
{ /* sentinel */ },
@@ -424,6 +459,7 @@ struct platform_driver ipu_prg_drv = {
.remove = ipu_prg_remove,
.driver = {
.name = "imx-ipu-prg",
+ .pm = &prg_pm_ops,
.of_match_table = ipu_prg_dt_ids,
},
};
diff --git a/drivers/gpu/ipu-v3/ipu-prv.h b/drivers/gpu/ipu-v3/ipu-prv.h
index ac4b8d658500..d6beee99b6b8 100644
--- a/drivers/gpu/ipu-v3/ipu-prv.h
+++ b/drivers/gpu/ipu-v3/ipu-prv.h
@@ -269,8 +269,8 @@ int ipu_pre_get(struct ipu_pre *pre);
void ipu_pre_put(struct ipu_pre *pre);
u32 ipu_pre_get_baddr(struct ipu_pre *pre);
void ipu_pre_configure(struct ipu_pre *pre, unsigned int width,
- unsigned int height,
- unsigned int stride, u32 format, unsigned int bufaddr);
+ unsigned int height, unsigned int stride, u32 format,
+ uint64_t modifier, unsigned int bufaddr);
void ipu_pre_update(struct ipu_pre *pre, unsigned int bufaddr);
struct ipu_prg *ipu_prg_lookup_by_phandle(struct device *dev, const char *name,
diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h
index 5afd6e364fb6..1c27526c499e 100644
--- a/include/drm/drm_atomic.h
+++ b/include/drm/drm_atomic.h
@@ -189,12 +189,40 @@ struct drm_private_state_funcs {
struct drm_private_state *state);
};
+/**
+ * struct drm_private_obj - base struct for driver private atomic object
+ *
+ * A driver private object is initialized by calling
+ * drm_atomic_private_obj_init() and cleaned up by calling
+ * drm_atomic_private_obj_fini().
+ *
+ * Currently only tracks the state update functions and the opaque driver
+ * private state itself, but in the future might also track which
+ * &drm_modeset_lock is required to duplicate and update this object's state.
+ */
struct drm_private_obj {
+ /**
+ * @state: Current atomic state for this driver private object.
+ */
struct drm_private_state *state;
+ /**
+ * @funcs:
+ *
+ * Functions to manipulate the state of this driver private object, see
+ * &drm_private_state_funcs.
+ */
const struct drm_private_state_funcs *funcs;
};
+/**
+ * struct drm_private_state - base struct for driver private object state
+ * @state: backpointer to global drm_atomic_state
+ *
+ * Currently only contains a backpointer to the overall atomic update, but in
+ * the future also might hold synchronization information similar to e.g.
+ * &drm_crtc.commit.
+ */
struct drm_private_state {
struct drm_atomic_state *state;
};
@@ -218,6 +246,10 @@ struct __drm_private_objs_state {
* @num_private_objs: size of the @private_objs array
* @private_objs: pointer to array of private object pointers
* @acquire_ctx: acquire context for this atomic modeset state update
+ *
+ * States are added to an atomic update by calling drm_atomic_get_crtc_state(),
+ * drm_atomic_get_plane_state(), drm_atomic_get_connector_state(), or for
+ * private state structures, drm_atomic_get_private_obj_state().
*/
struct drm_atomic_state {
struct kref ref;
diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h
index 1494b12a984a..b069433e7fc1 100644
--- a/include/drm/drm_fb_helper.h
+++ b/include/drm/drm_fb_helper.h
@@ -33,6 +33,7 @@
struct drm_fb_helper;
#include <drm/drm_crtc.h>
+#include <drm/drm_device.h>
#include <linux/kgdb.h>
enum mode_set_atomic {
@@ -275,6 +276,7 @@ void drm_fb_helper_unlink_fbi(struct drm_fb_helper *fb_helper);
void drm_fb_helper_deferred_io(struct fb_info *info,
struct list_head *pagelist);
+int drm_fb_helper_defio_init(struct drm_fb_helper *fb_helper);
ssize_t drm_fb_helper_sys_read(struct fb_info *info, char __user *buf,
size_t count, loff_t *ppos);
@@ -319,6 +321,13 @@ int drm_fb_helper_add_one_connector(struct drm_fb_helper *fb_helper, struct drm_
int drm_fb_helper_remove_one_connector(struct drm_fb_helper *fb_helper,
struct drm_connector *connector);
+int drm_fb_helper_fbdev_setup(struct drm_device *dev,
+ struct drm_fb_helper *fb_helper,
+ const struct drm_fb_helper_funcs *funcs,
+ unsigned int preferred_bpp,
+ unsigned int max_conn_count);
+void drm_fb_helper_fbdev_teardown(struct drm_device *dev);
+
void drm_fb_helper_lastclose(struct drm_device *dev);
void drm_fb_helper_output_poll_changed(struct drm_device *dev);
#else
@@ -332,11 +341,17 @@ static inline int drm_fb_helper_init(struct drm_device *dev,
struct drm_fb_helper *helper,
int max_conn)
{
+ /* So drivers can use it to free the struct */
+ helper->dev = dev;
+ dev->fb_helper = helper;
+
return 0;
}
static inline void drm_fb_helper_fini(struct drm_fb_helper *helper)
{
+ if (helper && helper->dev)
+ helper->dev->fb_helper = NULL;
}
static inline int drm_fb_helper_blank(int blank, struct fb_info *info)
@@ -409,6 +424,11 @@ static inline void drm_fb_helper_deferred_io(struct fb_info *info,
{
}
+static inline int drm_fb_helper_defio_init(struct drm_fb_helper *fb_helper)
+{
+ return -ENODEV;
+}
+
static inline ssize_t drm_fb_helper_sys_read(struct fb_info *info,
char __user *buf, size_t count,
loff_t *ppos)
@@ -518,6 +538,24 @@ drm_fb_helper_remove_one_connector(struct drm_fb_helper *fb_helper,
return 0;
}
+static inline int
+drm_fb_helper_fbdev_setup(struct drm_device *dev,
+ struct drm_fb_helper *fb_helper,
+ const struct drm_fb_helper_funcs *funcs,
+ unsigned int preferred_bpp,
+ unsigned int max_conn_count)
+{
+ /* So drivers can use it to free the struct */
+ dev->fb_helper = fb_helper;
+
+ return 0;
+}
+
+static inline void drm_fb_helper_fbdev_teardown(struct drm_device *dev)
+{
+ dev->fb_helper = NULL;
+}
+
static inline void drm_fb_helper_lastclose(struct drm_device *dev)
{
}
diff --git a/include/drm/drm_framebuffer.h b/include/drm/drm_framebuffer.h
index dccb897951ba..c50502c656e5 100644
--- a/include/drm/drm_framebuffer.h
+++ b/include/drm/drm_framebuffer.h
@@ -121,6 +121,12 @@ struct drm_framebuffer {
* @base: base modeset object structure, contains the reference count.
*/
struct drm_mode_object base;
+
+ /**
+ * @comm: Name of the process allocating the fb, used for fb dumping.
+ */
+ char comm[TASK_COMM_LEN];
+
/**
* @format: framebuffer format information
*/
diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h
index c6639e8e5007..2cb6f02df64a 100644
--- a/include/drm/drm_mode_config.h
+++ b/include/drm/drm_mode_config.h
@@ -269,6 +269,9 @@ struct drm_mode_config_funcs {
* state easily. If this hook is implemented, drivers must also
* implement @atomic_state_clear and @atomic_state_free.
*
+ * Subclassing of &drm_atomic_state is deprecated in favour of using
+ * &drm_private_state and &drm_private_obj.
+ *
* RETURNS:
*
* A new &drm_atomic_state on success or NULL on failure.
@@ -290,6 +293,9 @@ struct drm_mode_config_funcs {
*
* Drivers that implement this must call drm_atomic_state_default_clear()
* to clear common state.
+ *
+ * Subclassing of &drm_atomic_state is deprecated in favour of using
+ * &drm_private_state and &drm_private_obj.
*/
void (*atomic_state_clear)(struct drm_atomic_state *state);
@@ -302,6 +308,9 @@ struct drm_mode_config_funcs {
*
* Drivers that implement this must call
* drm_atomic_state_default_release() to release common resources.
+ *
+ * Subclassing of &drm_atomic_state is deprecated in favour of using
+ * &drm_private_state and &drm_private_obj.
*/
void (*atomic_state_free)(struct drm_atomic_state *state);
};
diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
index 5f9932e2246e..2a4a42e59a47 100644
--- a/include/drm/drm_print.h
+++ b/include/drm/drm_print.h
@@ -80,13 +80,13 @@ void __drm_printfn_debug(struct drm_printer *p, struct va_format *vaf);
__printf(2, 3)
void drm_printf(struct drm_printer *p, const char *f, ...);
+__printf(2, 0)
/**
* drm_vprintf - print to a &drm_printer stream
* @p: the &drm_printer
* @fmt: format string
* @va: the va_list
*/
-__printf(2, 0)
static inline void
drm_vprintf(struct drm_printer *p, const char *fmt, va_list *va)
{
diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
index 9e8ba90c6784..3980602472c0 100644
--- a/include/drm/drm_syncobj.h
+++ b/include/drm/drm_syncobj.h
@@ -33,36 +33,31 @@ struct drm_syncobj_cb;
/**
* struct drm_syncobj - sync object.
*
- * This structure defines a generic sync object which wraps a dma fence.
+ * This structure defines a generic sync object which wraps a &dma_fence.
*/
struct drm_syncobj {
/**
- * @refcount:
- *
- * Reference count of this object.
+ * @refcount: Reference count of this object.
*/
struct kref refcount;
/**
* @fence:
* NULL or a pointer to the fence bound to this object.
*
- * This field should not be used directly. Use drm_syncobj_fence_get
- * and drm_syncobj_replace_fence instead.
+ * This field should not be used directly. Use drm_syncobj_fence_get()
+ * and drm_syncobj_replace_fence() instead.
*/
struct dma_fence __rcu *fence;
/**
- * @cb_list:
- * List of callbacks to call when the fence gets replaced
+ * @cb_list: List of callbacks to call when the &fence gets replaced.
*/
struct list_head cb_list;
/**
- * @lock:
- * locks cb_list and write-locks fence.
+ * @lock: Protects &cb_list and write-locks &fence.
*/
spinlock_t lock;
/**
- * @file:
- * a file backing for this syncobj.
+ * @file: A file backing for this syncobj.
*/
struct file *file;
};
@@ -73,7 +68,7 @@ typedef void (*drm_syncobj_func_t)(struct drm_syncobj *syncobj,
/**
* struct drm_syncobj_cb - callback for drm_syncobj_add_callback
* @node: used by drm_syncob_add_callback to append this struct to
- * syncobj::cb_list
+ * &drm_syncobj.cb_list
* @func: drm_syncobj_func_t to call
*
* This struct will be initialized by drm_syncobj_add_callback, additional
@@ -92,7 +87,7 @@ void drm_syncobj_free(struct kref *kref);
* drm_syncobj_get - acquire a syncobj reference
* @obj: sync object
*
- * This acquires additional reference to @obj. It is illegal to call this
+ * This acquires an additional reference to @obj. It is illegal to call this
* without already holding a reference. No locks required.
*/
static inline void
@@ -111,6 +106,17 @@ drm_syncobj_put(struct drm_syncobj *obj)
kref_put(&obj->refcount, drm_syncobj_free);
}
+/**
+ * drm_syncobj_fence_get - get a reference to a fence in a sync object
+ * @syncobj: sync object.
+ *
+ * This acquires additional reference to &drm_syncobj.fence contained in @obj,
+ * if not NULL. It is illegal to call this without already holding a reference.
+ * No locks required.
+ *
+ * Returns:
+ * Either the fence of @obj or NULL if there's none.
+ */
static inline struct dma_fence *
drm_syncobj_fence_get(struct drm_syncobj *syncobj)
{
diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index 972a25633525..5db0458dd832 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -373,24 +373,46 @@
/* CFL S */
#define INTEL_CFL_S_GT1_IDS(info) \
INTEL_VGA_DEVICE(0x3E90, info), /* SRV GT1 */ \
- INTEL_VGA_DEVICE(0x3E93, info) /* SRV GT1 */
+ INTEL_VGA_DEVICE(0x3E93, info), /* SRV GT1 */ \
+ INTEL_VGA_DEVICE(0x3E99, info) /* SRV GT1 */
#define INTEL_CFL_S_GT2_IDS(info) \
INTEL_VGA_DEVICE(0x3E91, info), /* SRV GT2 */ \
INTEL_VGA_DEVICE(0x3E92, info), /* SRV GT2 */ \
- INTEL_VGA_DEVICE(0x3E96, info) /* SRV GT2 */
+ INTEL_VGA_DEVICE(0x3E96, info), /* SRV GT2 */ \
+ INTEL_VGA_DEVICE(0x3E9A, info) /* SRV GT2 */
/* CFL H */
#define INTEL_CFL_H_GT2_IDS(info) \
INTEL_VGA_DEVICE(0x3E9B, info), /* Halo GT2 */ \
INTEL_VGA_DEVICE(0x3E94, info) /* Halo GT2 */
-/* CFL U */
+/* CFL U GT1 */
+#define INTEL_CFL_U_GT1_IDS(info) \
+ INTEL_VGA_DEVICE(0x3EA1, info), \
+ INTEL_VGA_DEVICE(0x3EA4, info)
+
+/* CFL U GT2 */
+#define INTEL_CFL_U_GT2_IDS(info) \
+ INTEL_VGA_DEVICE(0x3EA0, info), \
+ INTEL_VGA_DEVICE(0x3EA3, info), \
+ INTEL_VGA_DEVICE(0x3EA9, info)
+
+/* CFL U GT3 */
#define INTEL_CFL_U_GT3_IDS(info) \
+ INTEL_VGA_DEVICE(0x3EA2, info), /* ULT GT3 */ \
+ INTEL_VGA_DEVICE(0x3EA5, info), /* ULT GT3 */ \
INTEL_VGA_DEVICE(0x3EA6, info), /* ULT GT3 */ \
INTEL_VGA_DEVICE(0x3EA7, info), /* ULT GT3 */ \
- INTEL_VGA_DEVICE(0x3EA8, info), /* ULT GT3 */ \
- INTEL_VGA_DEVICE(0x3EA5, info) /* ULT GT3 */
+ INTEL_VGA_DEVICE(0x3EA8, info) /* ULT GT3 */
+
+#define INTEL_CFL_IDS(info) \
+ INTEL_CFL_S_GT1_IDS(info), \
+ INTEL_CFL_S_GT2_IDS(info), \
+ INTEL_CFL_H_GT2_IDS(info), \
+ INTEL_CFL_U_GT1_IDS(info), \
+ INTEL_CFL_U_GT2_IDS(info), \
+ INTEL_CFL_U_GT3_IDS(info)
/* CNL U 2+2 */
#define INTEL_CNL_U_GT2_IDS(info) \
diff --git a/include/uapi/drm/exynos_drm.h b/include/uapi/drm/exynos_drm.h
index d01087b2a651..4a54305120e0 100644
--- a/include/uapi/drm/exynos_drm.h
+++ b/include/uapi/drm/exynos_drm.h
@@ -135,172 +135,6 @@ struct drm_exynos_g2d_exec {
__u64 async;
};
-enum drm_exynos_ops_id {
- EXYNOS_DRM_OPS_SRC,
- EXYNOS_DRM_OPS_DST,
- EXYNOS_DRM_OPS_MAX,
-};
-
-struct drm_exynos_sz {
- __u32 hsize;
- __u32 vsize;
-};
-
-struct drm_exynos_pos {
- __u32 x;
- __u32 y;
- __u32 w;
- __u32 h;
-};
-
-enum drm_exynos_flip {
- EXYNOS_DRM_FLIP_NONE = (0 << 0),
- EXYNOS_DRM_FLIP_VERTICAL = (1 << 0),
- EXYNOS_DRM_FLIP_HORIZONTAL = (1 << 1),
- EXYNOS_DRM_FLIP_BOTH = EXYNOS_DRM_FLIP_VERTICAL |
- EXYNOS_DRM_FLIP_HORIZONTAL,
-};
-
-enum drm_exynos_degree {
- EXYNOS_DRM_DEGREE_0,
- EXYNOS_DRM_DEGREE_90,
- EXYNOS_DRM_DEGREE_180,
- EXYNOS_DRM_DEGREE_270,
-};
-
-enum drm_exynos_planer {
- EXYNOS_DRM_PLANAR_Y,
- EXYNOS_DRM_PLANAR_CB,
- EXYNOS_DRM_PLANAR_CR,
- EXYNOS_DRM_PLANAR_MAX,
-};
-
-/**
- * A structure for ipp supported property list.
- *
- * @version: version of this structure.
- * @ipp_id: id of ipp driver.
- * @count: count of ipp driver.
- * @writeback: flag of writeback supporting.
- * @flip: flag of flip supporting.
- * @degree: flag of degree information.
- * @csc: flag of csc supporting.
- * @crop: flag of crop supporting.
- * @scale: flag of scale supporting.
- * @refresh_min: min hz of refresh.
- * @refresh_max: max hz of refresh.
- * @crop_min: crop min resolution.
- * @crop_max: crop max resolution.
- * @scale_min: scale min resolution.
- * @scale_max: scale max resolution.
- */
-struct drm_exynos_ipp_prop_list {
- __u32 version;
- __u32 ipp_id;
- __u32 count;
- __u32 writeback;
- __u32 flip;
- __u32 degree;
- __u32 csc;
- __u32 crop;
- __u32 scale;
- __u32 refresh_min;
- __u32 refresh_max;
- __u32 reserved;
- struct drm_exynos_sz crop_min;
- struct drm_exynos_sz crop_max;
- struct drm_exynos_sz scale_min;
- struct drm_exynos_sz scale_max;
-};
-
-/**
- * A structure for ipp config.
- *
- * @ops_id: property of operation directions.
- * @flip: property of mirror, flip.
- * @degree: property of rotation degree.
- * @fmt: property of image format.
- * @sz: property of image size.
- * @pos: property of image position(src-cropped,dst-scaler).
- */
-struct drm_exynos_ipp_config {
- __u32 ops_id;
- __u32 flip;
- __u32 degree;
- __u32 fmt;
- struct drm_exynos_sz sz;
- struct drm_exynos_pos pos;
-};
-
-enum drm_exynos_ipp_cmd {
- IPP_CMD_NONE,
- IPP_CMD_M2M,
- IPP_CMD_WB,
- IPP_CMD_OUTPUT,
- IPP_CMD_MAX,
-};
-
-/**
- * A structure for ipp property.
- *
- * @config: source, destination config.
- * @cmd: definition of command.
- * @ipp_id: id of ipp driver.
- * @prop_id: id of property.
- * @refresh_rate: refresh rate.
- */
-struct drm_exynos_ipp_property {
- struct drm_exynos_ipp_config config[EXYNOS_DRM_OPS_MAX];
- __u32 cmd;
- __u32 ipp_id;
- __u32 prop_id;
- __u32 refresh_rate;
-};
-
-enum drm_exynos_ipp_buf_type {
- IPP_BUF_ENQUEUE,
- IPP_BUF_DEQUEUE,
-};
-
-/**
- * A structure for ipp buffer operations.
- *
- * @ops_id: operation directions.
- * @buf_type: definition of buffer.
- * @prop_id: id of property.
- * @buf_id: id of buffer.
- * @handle: Y, Cb, Cr each planar handle.
- * @user_data: user data.
- */
-struct drm_exynos_ipp_queue_buf {
- __u32 ops_id;
- __u32 buf_type;
- __u32 prop_id;
- __u32 buf_id;
- __u32 handle[EXYNOS_DRM_PLANAR_MAX];
- __u32 reserved;
- __u64 user_data;
-};
-
-enum drm_exynos_ipp_ctrl {
- IPP_CTRL_PLAY,
- IPP_CTRL_STOP,
- IPP_CTRL_PAUSE,
- IPP_CTRL_RESUME,
- IPP_CTRL_MAX,
-};
-
-/**
- * A structure for ipp start/stop operations.
- *
- * @prop_id: id of property.
- * @ctrl: definition of control.
- */
-struct drm_exynos_ipp_cmd_ctrl {
- __u32 prop_id;
- __u32 ctrl;
-};
-
#define DRM_EXYNOS_GEM_CREATE 0x00
#define DRM_EXYNOS_GEM_MAP 0x01
/* Reserved 0x03 ~ 0x05 for exynos specific gem ioctl */
@@ -312,11 +146,7 @@ struct drm_exynos_ipp_cmd_ctrl {
#define DRM_EXYNOS_G2D_SET_CMDLIST 0x21
#define DRM_EXYNOS_G2D_EXEC 0x22
-/* IPP - Image Post Processing */
-#define DRM_EXYNOS_IPP_GET_PROPERTY 0x30
-#define DRM_EXYNOS_IPP_SET_PROPERTY 0x31
-#define DRM_EXYNOS_IPP_QUEUE_BUF 0x32
-#define DRM_EXYNOS_IPP_CMD_CTRL 0x33
+/* Reserved 0x30 ~ 0x33 for obsolete Exynos IPP ioctls */
#define DRM_IOCTL_EXYNOS_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + \
DRM_EXYNOS_GEM_CREATE, struct drm_exynos_gem_create)
@@ -335,18 +165,8 @@ struct drm_exynos_ipp_cmd_ctrl {
#define DRM_IOCTL_EXYNOS_G2D_EXEC DRM_IOWR(DRM_COMMAND_BASE + \
DRM_EXYNOS_G2D_EXEC, struct drm_exynos_g2d_exec)
-#define DRM_IOCTL_EXYNOS_IPP_GET_PROPERTY DRM_IOWR(DRM_COMMAND_BASE + \
- DRM_EXYNOS_IPP_GET_PROPERTY, struct drm_exynos_ipp_prop_list)
-#define DRM_IOCTL_EXYNOS_IPP_SET_PROPERTY DRM_IOWR(DRM_COMMAND_BASE + \
- DRM_EXYNOS_IPP_SET_PROPERTY, struct drm_exynos_ipp_property)
-#define DRM_IOCTL_EXYNOS_IPP_QUEUE_BUF DRM_IOWR(DRM_COMMAND_BASE + \
- DRM_EXYNOS_IPP_QUEUE_BUF, struct drm_exynos_ipp_queue_buf)
-#define DRM_IOCTL_EXYNOS_IPP_CMD_CTRL DRM_IOWR(DRM_COMMAND_BASE + \
- DRM_EXYNOS_IPP_CMD_CTRL, struct drm_exynos_ipp_cmd_ctrl)
-
/* EXYNOS specific events */
#define DRM_EXYNOS_G2D_EVENT 0x80000000
-#define DRM_EXYNOS_IPP_EVENT 0x80000001
struct drm_exynos_g2d_event {
struct drm_event base;
@@ -357,16 +177,6 @@ struct drm_exynos_g2d_event {
__u32 reserved;
};
-struct drm_exynos_ipp_event {
- struct drm_event base;
- __u64 user_data;
- __u32 tv_sec;
- __u32 tv_usec;
- __u32 prop_id;
- __u32 reserved;
- __u32 buf_id[EXYNOS_DRM_OPS_MAX];
-};
-
#if defined(__cplusplus)
}
#endif
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 6e80501368ae..f4cab5b3ba9a 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -58,7 +58,8 @@ struct kfd_ioctl_create_queue_args {
__u64 eop_buffer_address; /* to KFD */
__u64 eop_buffer_size; /* to KFD */
__u64 ctx_save_restore_address; /* to KFD */
- __u64 ctx_save_restore_size; /* to KFD */
+ __u32 ctx_save_restore_size; /* to KFD */
+ __u32 ctl_stack_size; /* to KFD */
};
struct kfd_ioctl_destroy_queue_args {
@@ -261,6 +262,13 @@ struct kfd_ioctl_get_tile_config_args {
*/
};
+struct kfd_ioctl_set_trap_handler_args {
+ uint64_t tba_addr; /* to KFD */
+ uint64_t tma_addr; /* to KFD */
+ uint32_t gpu_id; /* to KFD */
+ uint32_t pad;
+};
+
#define AMDKFD_IOCTL_BASE 'K'
#define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr)
#define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type)
@@ -321,7 +329,10 @@ struct kfd_ioctl_get_tile_config_args {
#define AMDKFD_IOC_GET_TILE_CONFIG \
AMDKFD_IOWR(0x12, struct kfd_ioctl_get_tile_config_args)
+#define AMDKFD_IOC_SET_TRAP_HANDLER \
+ AMDKFD_IOW(0x13, struct kfd_ioctl_set_trap_handler_args)
+
#define AMDKFD_COMMAND_START 0x01
-#define AMDKFD_COMMAND_END 0x13
+#define AMDKFD_COMMAND_END 0x14
#endif
diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h
index ce4c07688b13..abbad94e14a1 100644
--- a/include/video/imx-ipu-v3.h
+++ b/include/video/imx-ipu-v3.h
@@ -344,7 +344,7 @@ void ipu_prg_channel_disable(struct ipuv3_channel *ipu_chan);
int ipu_prg_channel_configure(struct ipuv3_channel *ipu_chan,
unsigned int axi_id, unsigned int width,
unsigned int height, unsigned int stride,
- u32 format, unsigned long *eba);
+ u32 format, uint64_t modifier, unsigned long *eba);
/*
* IPU CMOS Sensor Interface (csi) functions