aboutsummaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c26
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c1043
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c572
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c111
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c38
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15d.h5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Makefile10
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c20
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cik_regs.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h560
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm274
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm1214
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c52
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c11
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c131
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c114
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c84
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c65
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c119
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c92
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c39
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c9
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c340
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c319
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_module.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c443
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c392
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h583
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h112
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c50
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c22
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_queue.c8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.h1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/soc15_int.h47
-rw-r--r--drivers/gpu/drm/amd/include/kgd_kfd_interface.h26
-rw-r--r--drivers/gpu/drm/amd/include/v9_structs.h48
-rw-r--r--drivers/gpu/drm/bridge/dumb-vga-dac.c4
-rw-r--r--drivers/gpu/drm/exynos/Kconfig18
-rw-r--r--drivers/gpu/drm/exynos/Makefile2
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_drv.c35
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_drv.h10
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_dsi.c40
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_fimc.c1080
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_fimc.h23
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_fimd.c8
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_gem.c21
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_gem.h3
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_gsc.c1075
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_gsc.h24
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_ipp.c916
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_ipp.h175
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_rotator.c758
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_scaler.c694
-rw-r--r--drivers/gpu/drm/exynos/exynos_hdmi.c2
-rw-r--r--drivers/gpu/drm/exynos/exynos_mixer.c22
-rw-r--r--drivers/gpu/drm/exynos/regs-mixer.h1
-rw-r--r--drivers/gpu/drm/exynos/regs-scaler.h426
-rw-r--r--drivers/gpu/drm/i915/intel_csr.c1
-rw-r--r--drivers/gpu/drm/mediatek/Kconfig1
-rw-r--r--drivers/gpu/drm/mediatek/mtk_dpi.c60
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_gem.c2
-rw-r--r--drivers/gpu/drm/mediatek/mtk_dsi.c14
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_crtc.c26
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_crtc.h3
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_drv.c51
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_drv.h4
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_group.c16
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_group.h2
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_kms.c25
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_of.c1
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_regs.h16
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_vsp.c10
-rw-r--r--drivers/gpu/drm/tilcdc/tilcdc_crtc.c2
-rw-r--r--drivers/gpu/drm/vc4/vc4_crtc.c46
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_fb.c31
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_kms.c14
91 files changed, 9612 insertions, 3128 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 2ca2b5154d52..f3002020df6c 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -130,7 +130,8 @@ amdgpu-y += \
amdgpu_amdkfd.o \
amdgpu_amdkfd_fence.o \
amdgpu_amdkfd_gpuvm.o \
- amdgpu_amdkfd_gfx_v8.o
+ amdgpu_amdkfd_gfx_v8.o \
+ amdgpu_amdkfd_gfx_v9.o
# add cgs
amdgpu-y += amdgpu_cgs.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 4d36203ffb11..cd0e8f192e6a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -92,6 +92,10 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
case CHIP_POLARIS11:
kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
break;
+ case CHIP_VEGA10:
+ case CHIP_RAVEN:
+ kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions();
+ break;
default:
dev_dbg(adev->dev, "kfd not supported on this ASIC\n");
return;
@@ -175,6 +179,28 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
&gpu_resources.doorbell_physical_address,
&gpu_resources.doorbell_aperture_size,
&gpu_resources.doorbell_start_offset);
+ if (adev->asic_type >= CHIP_VEGA10) {
+ /* On SOC15 the BIF is involved in routing
+ * doorbells using the low 12 bits of the
+ * address. Communicate the assignments to
+ * KFD. KFD uses two doorbell pages per
+ * process in case of 64-bit doorbells so we
+ * can use each doorbell assignment twice.
+ */
+ gpu_resources.sdma_doorbell[0][0] =
+ AMDGPU_DOORBELL64_sDMA_ENGINE0;
+ gpu_resources.sdma_doorbell[0][1] =
+ AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200;
+ gpu_resources.sdma_doorbell[1][0] =
+ AMDGPU_DOORBELL64_sDMA_ENGINE1;
+ gpu_resources.sdma_doorbell[1][1] =
+ AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200;
+ /* Doorbells 0x0f0-0ff and 0x2f0-2ff are reserved for
+ * SDMA, IH and VCN. So don't use them for the CP.
+ */
+ gpu_resources.reserved_doorbell_mask = 0x1f0;
+ gpu_resources.reserved_doorbell_val = 0x0f0;
+ }
kgd2kfd->device_init(adev->kfd, &gpu_resources);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index c2c2bea731e0..12367a9951e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -28,6 +28,7 @@
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/mmu_context.h>
+#include <linux/workqueue.h>
#include <kgd_kfd_interface.h>
#include <drm/ttm/ttm_execbuf_util.h>
#include "amdgpu_sync.h"
@@ -59,7 +60,9 @@ struct kgd_mem {
uint32_t mapping_flags;
+ atomic_t invalid;
struct amdkfd_process_info *process_info;
+ struct page **user_pages;
struct amdgpu_sync sync;
@@ -84,6 +87,9 @@ struct amdkfd_process_info {
struct list_head vm_list_head;
/* List head for all KFD BOs that belong to a KFD process. */
struct list_head kfd_bo_list;
+ /* List of userptr BOs that are valid or invalid */
+ struct list_head userptr_valid_list;
+ struct list_head userptr_inval_list;
/* Lock to protect kfd_bo_list */
struct mutex lock;
@@ -91,6 +97,11 @@ struct amdkfd_process_info {
unsigned int n_vms;
/* Eviction Fence */
struct amdgpu_amdkfd_fence *eviction_fence;
+
+ /* MMU-notifier related fields */
+ atomic_t evicted_bos;
+ struct delayed_work restore_userptr_work;
+ struct pid *pid;
};
int amdgpu_amdkfd_init(void);
@@ -104,12 +115,14 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
+int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm);
int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr,
uint32_t *ib_cmd, uint32_t ib_len);
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void);
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index ea54e53172b9..0ff36d45a597 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -98,8 +98,6 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
unsigned int vmid);
-static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
- uint32_t hpd_size, uint64_t hpd_gpu_addr);
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr,
@@ -183,7 +181,6 @@ static const struct kfd2kgd_calls kfd2kgd = {
.free_pasid = amdgpu_pasid_free,
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
- .init_pipeline = kgd_init_pipeline,
.init_interrupts = kgd_init_interrupts,
.hqd_load = kgd_hqd_load,
.hqd_sdma_load = kgd_hqd_sdma_load,
@@ -309,13 +306,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
return 0;
}
-static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
- uint32_t hpd_size, uint64_t hpd_gpu_addr)
-{
- /* amdgpu owns the per-pipe state */
- return 0;
-}
-
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 89264c9a5e9f..6ef9762b4b00 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -57,8 +57,6 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
uint32_t sh_mem_bases);
static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
unsigned int vmid);
-static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
- uint32_t hpd_size, uint64_t hpd_gpu_addr);
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr,
@@ -141,7 +139,6 @@ static const struct kfd2kgd_calls kfd2kgd = {
.free_pasid = amdgpu_pasid_free,
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
- .init_pipeline = kgd_init_pipeline,
.init_interrupts = kgd_init_interrupts,
.hqd_load = kgd_hqd_load,
.hqd_sdma_load = kgd_hqd_sdma_load,
@@ -270,13 +267,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
return 0;
}
-static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
- uint32_t hpd_size, uint64_t hpd_gpu_addr)
-{
- /* amdgpu owns the per-pipe state */
- return 0;
-}
-
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
new file mode 100644
index 000000000000..8f37991df61b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -0,0 +1,1043 @@
+/*
+ * Copyright 2014-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#define pr_fmt(fmt) "kfd2kgd: " fmt
+
+#include <linux/module.h>
+#include <linux/fdtable.h>
+#include <linux/uaccess.h>
+#include <linux/firmware.h>
+#include <drm/drmP.h>
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_ucode.h"
+#include "soc15_hw_ip.h"
+#include "gc/gc_9_0_offset.h"
+#include "gc/gc_9_0_sh_mask.h"
+#include "vega10_enum.h"
+#include "sdma0/sdma0_4_0_offset.h"
+#include "sdma0/sdma0_4_0_sh_mask.h"
+#include "sdma1/sdma1_4_0_offset.h"
+#include "sdma1/sdma1_4_0_sh_mask.h"
+#include "athub/athub_1_0_offset.h"
+#include "athub/athub_1_0_sh_mask.h"
+#include "oss/osssys_4_0_offset.h"
+#include "oss/osssys_4_0_sh_mask.h"
+#include "soc15_common.h"
+#include "v9_structs.h"
+#include "soc15.h"
+#include "soc15d.h"
+
+/* HACK: MMHUB and GC both have VM-related register with the same
+ * names but different offsets. Define the MMHUB register we need here
+ * with a prefix. A proper solution would be to move the functions
+ * programming these registers into gfx_v9_0.c and mmhub_v1_0.c
+ * respectively.
+ */
+#define mmMMHUB_VM_INVALIDATE_ENG16_REQ 0x06f3
+#define mmMMHUB_VM_INVALIDATE_ENG16_REQ_BASE_IDX 0
+
+#define mmMMHUB_VM_INVALIDATE_ENG16_ACK 0x0705
+#define mmMMHUB_VM_INVALIDATE_ENG16_ACK_BASE_IDX 0
+
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32 0x072b
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32_BASE_IDX 0
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32 0x072c
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32_BASE_IDX 0
+
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32 0x074b
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32_BASE_IDX 0
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32 0x074c
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32_BASE_IDX 0
+
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32 0x076b
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32_BASE_IDX 0
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32 0x076c
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32_BASE_IDX 0
+
+#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32 0x0727
+#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32_BASE_IDX 0
+#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32 0x0728
+#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32_BASE_IDX 0
+
+#define V9_PIPE_PER_MEC (4)
+#define V9_QUEUES_PER_PIPE_MEC (8)
+
+enum hqd_dequeue_request_type {
+ NO_ACTION = 0,
+ DRAIN_PIPE,
+ RESET_WAVES
+};
+
+/*
+ * Register access functions
+ */
+
+static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
+ uint32_t sh_mem_config,
+ uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
+ uint32_t sh_mem_bases);
+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+ unsigned int vmid);
+static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
+static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t __user *wptr,
+ uint32_t wptr_shift, uint32_t wptr_mask,
+ struct mm_struct *mm);
+static int kgd_hqd_dump(struct kgd_dev *kgd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs);
+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+ uint32_t __user *wptr, struct mm_struct *mm);
+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs);
+static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
+ uint32_t pipe_id, uint32_t queue_id);
+static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+ enum kfd_preempt_type reset_type,
+ unsigned int utimeout, uint32_t pipe_id,
+ uint32_t queue_id);
+static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
+ unsigned int utimeout);
+static int kgd_address_watch_disable(struct kgd_dev *kgd);
+static int kgd_address_watch_execute(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ uint32_t cntl_val,
+ uint32_t addr_hi,
+ uint32_t addr_lo);
+static int kgd_wave_control_execute(struct kgd_dev *kgd,
+ uint32_t gfx_index_val,
+ uint32_t sq_cmd);
+static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ unsigned int reg_offset);
+
+static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
+ uint8_t vmid);
+static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
+ uint8_t vmid);
+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+ uint32_t page_table_base);
+static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
+static void set_scratch_backing_va(struct kgd_dev *kgd,
+ uint64_t va, uint32_t vmid);
+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
+static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
+
+/* Because of REG_GET_FIELD() being used, we put this function in the
+ * asic specific file.
+ */
+static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
+ struct tile_config *config)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+ config->gb_addr_config = adev->gfx.config.gb_addr_config;
+
+ config->tile_config_ptr = adev->gfx.config.tile_mode_array;
+ config->num_tile_configs =
+ ARRAY_SIZE(adev->gfx.config.tile_mode_array);
+ config->macro_tile_config_ptr =
+ adev->gfx.config.macrotile_mode_array;
+ config->num_macro_tile_configs =
+ ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
+
+ return 0;
+}
+
+static const struct kfd2kgd_calls kfd2kgd = {
+ .init_gtt_mem_allocation = alloc_gtt_mem,
+ .free_gtt_mem = free_gtt_mem,
+ .get_local_mem_info = get_local_mem_info,
+ .get_gpu_clock_counter = get_gpu_clock_counter,
+ .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
+ .alloc_pasid = amdgpu_pasid_alloc,
+ .free_pasid = amdgpu_pasid_free,
+ .program_sh_mem_settings = kgd_program_sh_mem_settings,
+ .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
+ .init_interrupts = kgd_init_interrupts,
+ .hqd_load = kgd_hqd_load,
+ .hqd_sdma_load = kgd_hqd_sdma_load,
+ .hqd_dump = kgd_hqd_dump,
+ .hqd_sdma_dump = kgd_hqd_sdma_dump,
+ .hqd_is_occupied = kgd_hqd_is_occupied,
+ .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
+ .hqd_destroy = kgd_hqd_destroy,
+ .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
+ .address_watch_disable = kgd_address_watch_disable,
+ .address_watch_execute = kgd_address_watch_execute,
+ .wave_control_execute = kgd_wave_control_execute,
+ .address_watch_get_offset = kgd_address_watch_get_offset,
+ .get_atc_vmid_pasid_mapping_pasid =
+ get_atc_vmid_pasid_mapping_pasid,
+ .get_atc_vmid_pasid_mapping_valid =
+ get_atc_vmid_pasid_mapping_valid,
+ .get_fw_version = get_fw_version,
+ .set_scratch_backing_va = set_scratch_backing_va,
+ .get_tile_config = amdgpu_amdkfd_get_tile_config,
+ .get_cu_info = get_cu_info,
+ .get_vram_usage = amdgpu_amdkfd_get_vram_usage,
+ .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
+ .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
+ .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
+ .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
+ .set_vm_context_page_table_base = set_vm_context_page_table_base,
+ .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
+ .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
+ .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
+ .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
+ .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
+ .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
+ .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
+ .invalidate_tlbs = invalidate_tlbs,
+ .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
+ .submit_ib = amdgpu_amdkfd_submit_ib,
+};
+
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
+{
+ return (struct kfd2kgd_calls *)&kfd2kgd;
+}
+
+static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
+{
+ return (struct amdgpu_device *)kgd;
+}
+
+static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
+ uint32_t queue, uint32_t vmid)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, mec, pipe, queue, vmid);
+}
+
+static void unlock_srbm(struct kgd_dev *kgd)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ soc15_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ lock_srbm(kgd, mec, pipe, queue_id, 0);
+}
+
+static uint32_t get_queue_mask(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id)
+{
+ unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe +
+ queue_id) & 31;
+
+ return ((uint32_t)1) << bit;
+}
+
+static void release_queue(struct kgd_dev *kgd)
+{
+ unlock_srbm(kgd);
+}
+
+static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
+ uint32_t sh_mem_config,
+ uint32_t sh_mem_ape1_base,
+ uint32_t sh_mem_ape1_limit,
+ uint32_t sh_mem_bases)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ lock_srbm(kgd, 0, 0, 0, vmid);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
+ /* APE1 no longer exists on GFX9 */
+
+ unlock_srbm(kgd);
+}
+
+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+ unsigned int vmid)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ /*
+ * We have to assume that there is no outstanding mapping.
+ * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
+ * a mapping is in progress or because a mapping finished
+ * and the SW cleared it.
+ * So the protocol is to always wait & clear.
+ */
+ uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
+ ATC_VMID0_PASID_MAPPING__VALID_MASK;
+
+ /*
+ * need to do this twice, once for gfx and once for mmhub
+ * for ATC add 16 to VMID for mmhub, for IH different registers.
+ * ATC_VMID0..15 registers are separate from ATC_VMID16..31.
+ */
+
+ WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid,
+ pasid_mapping);
+
+ while (!(RREG32(SOC15_REG_OFFSET(
+ ATHUB, 0,
+ mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
+ (1U << vmid)))
+ cpu_relax();
+
+ WREG32(SOC15_REG_OFFSET(ATHUB, 0,
+ mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
+ 1U << vmid);
+
+ /* Mapping vmid to pasid also for IH block */
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid,
+ pasid_mapping);
+
+ WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID16_PASID_MAPPING) + vmid,
+ pasid_mapping);
+
+ while (!(RREG32(SOC15_REG_OFFSET(
+ ATHUB, 0,
+ mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
+ (1U << (vmid + 16))))
+ cpu_relax();
+
+ WREG32(SOC15_REG_OFFSET(ATHUB, 0,
+ mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
+ 1U << (vmid + 16));
+
+ /* Mapping vmid to pasid also for IH block */
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid,
+ pasid_mapping);
+ return 0;
+}
+
+/* TODO - RING0 form of field is obsolete, seems to date back to SI
+ * but still works
+ */
+
+static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t mec;
+ uint32_t pipe;
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ lock_srbm(kgd, mec, pipe, 0, 0);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL),
+ CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
+ CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
+
+ unlock_srbm(kgd);
+
+ return 0;
+}
+
+static uint32_t get_sdma_base_addr(struct amdgpu_device *adev,
+ unsigned int engine_id,
+ unsigned int queue_id)
+{
+ uint32_t base[2] = {
+ SOC15_REG_OFFSET(SDMA0, 0,
+ mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL,
+ SOC15_REG_OFFSET(SDMA1, 0,
+ mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL
+ };
+ uint32_t retval;
+
+ retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL -
+ mmSDMA0_RLC0_RB_CNTL);
+
+ pr_debug("sdma base address: 0x%x\n", retval);
+
+ return retval;
+}
+
+static inline struct v9_mqd *get_mqd(void *mqd)
+{
+ return (struct v9_mqd *)mqd;
+}
+
+static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+ return (struct v9_sdma_mqd *)mqd;
+}
+
+static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t __user *wptr,
+ uint32_t wptr_shift, uint32_t wptr_mask,
+ struct mm_struct *mm)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct v9_mqd *m;
+ uint32_t *mqd_hqd;
+ uint32_t reg, hqd_base, data;
+
+ m = get_mqd(mqd);
+
+ acquire_queue(kgd, pipe_id, queue_id);
+
+ /* HIQ is set during driver init period with vmid set to 0*/
+ if (m->cp_hqd_vmid == 0) {
+ uint32_t value, mec, pipe;
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+ mec, pipe, queue_id);
+ value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
+ value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
+ ((mec << 5) | (pipe << 3) | queue_id | 0x80));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
+ }
+
+ /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
+ mqd_hqd = &m->cp_mqd_base_addr_lo;
+ hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
+
+ for (reg = hqd_base;
+ reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
+ WREG32(reg, mqd_hqd[reg - hqd_base]);
+
+
+ /* Activate doorbell logic before triggering WPTR poll. */
+ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+ CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
+
+ if (wptr) {
+ /* Don't read wptr with get_user because the user
+ * context may not be accessible (if this function
+ * runs in a work queue). Instead trigger a one-shot
+ * polling read from memory in the CP. This assumes
+ * that wptr is GPU-accessible in the queue's VMID via
+ * ATC or SVM. WPTR==RPTR before starting the poll so
+ * the CP starts fetching new commands from the right
+ * place.
+ *
+ * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
+ * tricky. Assume that the queue didn't overflow. The
+ * number of valid bits in the 32-bit RPTR depends on
+ * the queue size. The remaining bits are taken from
+ * the saved 64-bit WPTR. If the WPTR wrapped, add the
+ * queue size.
+ */
+ uint32_t queue_size =
+ 2 << REG_GET_FIELD(m->cp_hqd_pq_control,
+ CP_HQD_PQ_CONTROL, QUEUE_SIZE);
+ uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
+
+ if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
+ guessed_wptr += queue_size;
+ guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
+ guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+ lower_32_bits(guessed_wptr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+ upper_32_bits(guessed_wptr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+ lower_32_bits((uint64_t)wptr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ upper_32_bits((uint64_t)wptr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
+ get_queue_mask(adev, pipe_id, queue_id));
+ }
+
+ /* Start the EOP fetcher */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
+ REG_SET_FIELD(m->cp_hqd_eop_rptr,
+ CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+
+ data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
+
+ release_queue(kgd);
+
+ return 0;
+}
+
+static int kgd_hqd_dump(struct kgd_dev *kgd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t i = 0, reg;
+#define HQD_N_REGS 56
+#define DUMP_REG(addr) do { \
+ if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
+ break; \
+ (*dump)[i][0] = (addr) << 2; \
+ (*dump)[i++][1] = RREG32(addr); \
+ } while (0)
+
+ *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ acquire_queue(kgd, pipe_id, queue_id);
+
+ for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
+ reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
+ DUMP_REG(reg);
+
+ release_queue(kgd);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+ uint32_t __user *wptr, struct mm_struct *mm)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct v9_sdma_mqd *m;
+ uint32_t sdma_base_addr, sdmax_gfx_context_cntl;
+ unsigned long end_jiffies;
+ uint32_t data;
+ uint64_t data64;
+ uint64_t __user *wptr64 = (uint64_t __user *)wptr;
+
+ m = get_sdma_mqd(mqd);
+ sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+ sdmax_gfx_context_cntl = m->sdma_engine_id ?
+ SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) :
+ SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL);
+
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+ m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
+
+ end_jiffies = msecs_to_jiffies(2000) + jiffies;
+ while (true) {
+ data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+ if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies))
+ return -ETIME;
+ usleep_range(500, 1000);
+ }
+ data = RREG32(sdmax_gfx_context_cntl);
+ data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
+ RESUME_CTX, 0);
+ WREG32(sdmax_gfx_context_cntl, data);
+
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET,
+ m->sdmax_rlcx_doorbell_offset);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
+ ENABLE, 1);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
+ if (read_user_wptr(mm, wptr64, data64)) {
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+ lower_32_bits(data64));
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
+ upper_32_bits(data64));
+ } else {
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+ }
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
+
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
+ m->sdmax_rlcx_rb_base_hi);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+ m->sdmax_rlcx_rb_rptr_addr_lo);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+ m->sdmax_rlcx_rb_rptr_addr_hi);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
+ RB_ENABLE, 1);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
+
+ return 0;
+}
+
+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id);
+ uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (19+6+7+10)
+
+ *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
+ DUMP_REG(sdma_base_addr + reg);
+ for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
+ DUMP_REG(sdma_base_addr + reg);
+ for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
+ reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
+ DUMP_REG(sdma_base_addr + reg);
+ for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
+ reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
+ DUMP_REG(sdma_base_addr + reg);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
+ uint32_t pipe_id, uint32_t queue_id)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t act;
+ bool retval = false;
+ uint32_t low, high;
+
+ acquire_queue(kgd, pipe_id, queue_id);
+ act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+ if (act) {
+ low = lower_32_bits(queue_address >> 8);
+ high = upper_32_bits(queue_address >> 8);
+
+ if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) &&
+ high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI)))
+ retval = true;
+ }
+ release_queue(kgd);
+ return retval;
+}
+
+static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct v9_sdma_mqd *m;
+ uint32_t sdma_base_addr;
+ uint32_t sdma_rlc_rb_cntl;
+
+ m = get_sdma_mqd(mqd);
+ sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+
+ if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
+ return true;
+
+ return false;
+}
+
+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+ enum kfd_preempt_type reset_type,
+ unsigned int utimeout, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ enum hqd_dequeue_request_type type;
+ unsigned long end_jiffies;
+ uint32_t temp;
+ struct v9_mqd *m = get_mqd(mqd);
+
+ acquire_queue(kgd, pipe_id, queue_id);
+
+ if (m->cp_hqd_vmid == 0)
+ WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
+
+ switch (reset_type) {
+ case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
+ type = DRAIN_PIPE;
+ break;
+ case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
+ type = RESET_WAVES;
+ break;
+ default:
+ type = DRAIN_PIPE;
+ break;
+ }
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
+
+ end_jiffies = (utimeout * HZ / 1000) + jiffies;
+ while (true) {
+ temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+ if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("cp queue preemption time out.\n");
+ release_queue(kgd);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ release_queue(kgd);
+ return 0;
+}
+
+static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
+ unsigned int utimeout)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct v9_sdma_mqd *m;
+ uint32_t sdma_base_addr;
+ uint32_t temp;
+ unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+ m = get_sdma_mqd(mqd);
+ sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+ temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
+
+ while (true) {
+ temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+ if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies))
+ return -ETIME;
+ usleep_range(500, 1000);
+ }
+
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+ RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
+ SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
+
+ m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
+ m->sdmax_rlcx_rb_rptr_hi =
+ RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI);
+
+ return 0;
+}
+
+static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
+ uint8_t vmid)
+{
+ uint32_t reg;
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+ reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ + vmid);
+ return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
+}
+
+static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
+ uint8_t vmid)
+{
+ uint32_t reg;
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+ reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ + vmid);
+ return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+}
+
+static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+ uint32_t req = (1 << vmid) |
+ (0 << VM_INVALIDATE_ENG16_REQ__FLUSH_TYPE__SHIFT) | /* legacy */
+ VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PTES_MASK |
+ VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE0_MASK |
+ VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE1_MASK |
+ VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE2_MASK |
+ VM_INVALIDATE_ENG16_REQ__INVALIDATE_L1_PTES_MASK;
+
+ mutex_lock(&adev->srbm_mutex);
+
+ /* Use legacy mode tlb invalidation.
+ *
+ * Currently on Raven the code below is broken for anything but
+ * legacy mode due to a MMHUB power gating problem. A workaround
+ * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ
+ * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack
+ * bit.
+ *
+ * TODO 1: agree on the right set of invalidation registers for
+ * KFD use. Use the last one for now. Invalidate both GC and
+ * MMHUB.
+ *
+ * TODO 2: support range-based invalidation, requires kfg2kgd
+ * interface change
+ */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_LO32),
+ 0xffffffff);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_HI32),
+ 0x0000001f);
+
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0,
+ mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32),
+ 0xffffffff);
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0,
+ mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32),
+ 0x0000001f);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_REQ), req);
+
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_REQ),
+ req);
+
+ while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ACK)) &
+ (1 << vmid)))
+ cpu_relax();
+
+ while (!(RREG32(SOC15_REG_OFFSET(MMHUB, 0,
+ mmMMHUB_VM_INVALIDATE_ENG16_ACK)) &
+ (1 << vmid)))
+ cpu_relax();
+
+ mutex_unlock(&adev->srbm_mutex);
+
+}
+
+static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
+{
+ signed long r;
+ uint32_t seq;
+ struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+
+ spin_lock(&adev->gfx.kiq.ring_lock);
+ amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
+ amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+ amdgpu_ring_write(ring,
+ PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+ PACKET3_INVALIDATE_TLBS_ALL_HUB(1) |
+ PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+ PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(0)); /* legacy */
+ amdgpu_fence_emit_polling(ring, &seq);
+ amdgpu_ring_commit(ring);
+ spin_unlock(&adev->gfx.kiq.ring_lock);
+
+ r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+ if (r < 1) {
+ DRM_ERROR("wait for kiq fence error: %ld.\n", r);
+ return -ETIME;
+ }
+
+ return 0;
+}
+
+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+ int vmid;
+ struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+
+ if (ring->ready)
+ return invalidate_tlbs_with_kiq(adev, pasid);
+
+ for (vmid = 0; vmid < 16; vmid++) {
+ if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
+ continue;
+ if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
+ if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
+ == pasid) {
+ write_vmid_invalidate_request(kgd, vmid);
+ break;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+ if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+ pr_err("non kfd vmid %d\n", vmid);
+ return 0;
+ }
+
+ write_vmid_invalidate_request(kgd, vmid);
+ return 0;
+}
+
+static int kgd_address_watch_disable(struct kgd_dev *kgd)
+{
+ return 0;
+}
+
+static int kgd_address_watch_execute(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ uint32_t cntl_val,
+ uint32_t addr_hi,
+ uint32_t addr_lo)
+{
+ return 0;
+}
+
+static int kgd_wave_control_execute(struct kgd_dev *kgd,
+ uint32_t gfx_index_val,
+ uint32_t sq_cmd)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t data = 0;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
+
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SH_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SE_BROADCAST_WRITES, 1);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ unsigned int reg_offset)
+{
+ return 0;
+}
+
+static void set_scratch_backing_va(struct kgd_dev *kgd,
+ uint64_t va, uint32_t vmid)
+{
+ /* No longer needed on GFXv9. The scratch base address is
+ * passed to the shader by the CP. It's the user mode driver's
+ * responsibility.
+ */
+}
+
+/* FIXME: Does this need to be ASIC-specific code? */
+static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+ const union amdgpu_firmware_header *hdr;
+
+ switch (type) {
+ case KGD_ENGINE_PFP:
+ hdr = (const union amdgpu_firmware_header *)adev->gfx.pfp_fw->data;
+ break;
+
+ case KGD_ENGINE_ME:
+ hdr = (const union amdgpu_firmware_header *)adev->gfx.me_fw->data;
+ break;
+
+ case KGD_ENGINE_CE:
+ hdr = (const union amdgpu_firmware_header *)adev->gfx.ce_fw->data;
+ break;
+
+ case KGD_ENGINE_MEC1:
+ hdr = (const union amdgpu_firmware_header *)adev->gfx.mec_fw->data;
+ break;
+
+ case KGD_ENGINE_MEC2:
+ hdr = (const union amdgpu_firmware_header *)adev->gfx.mec2_fw->data;
+ break;
+
+ case KGD_ENGINE_RLC:
+ hdr = (const union amdgpu_firmware_header *)adev->gfx.rlc_fw->data;
+ break;
+
+ case KGD_ENGINE_SDMA1:
+ hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[0].fw->data;
+ break;
+
+ case KGD_ENGINE_SDMA2:
+ hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[1].fw->data;
+ break;
+
+ default:
+ return 0;
+ }
+
+ if (hdr == NULL)
+ return 0;
+
+ /* Only 12 bit in use*/
+ return hdr->common.ucode_version;
+}
+
+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+ uint32_t page_table_base)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint64_t base = (uint64_t)page_table_base << PAGE_SHIFT |
+ AMDGPU_PTE_VALID;
+
+ if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+ pr_err("trying to set page table base for wrong VMID %u\n",
+ vmid);
+ return;
+ }
+
+ /* TODO: take advantage of per-process address space size. For
+ * now, all processes share the same address space size, like
+ * on GFX8 and older.
+ */
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
+
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2),
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2),
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2),
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2),
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 1d6e1479da38..5296e24fd662 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -23,6 +23,7 @@
#define pr_fmt(fmt) "kfd2kgd: " fmt
#include <linux/list.h>
+#include <linux/sched/mm.h>
#include <drm/drmP.h>
#include "amdgpu_object.h"
#include "amdgpu_vm.h"
@@ -33,10 +34,20 @@
*/
#define VI_BO_SIZE_ALIGN (0x8000)
+/* BO flag to indicate a KFD userptr BO */
+#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
+
+/* Userptr restore delay, just long enough to allow consecutive VM
+ * changes to accumulate
+ */
+#define AMDGPU_USERPTR_RESTORE_DELAY_MS 1
+
/* Impose limit on how much memory KFD can use */
static struct {
uint64_t max_system_mem_limit;
+ uint64_t max_userptr_mem_limit;
int64_t system_mem_used;
+ int64_t userptr_mem_used;
spinlock_t mem_limit_lock;
} kfd_mem_limit;
@@ -57,6 +68,7 @@ static const char * const domain_bit_to_string[] = {
#define domain_string(domain) domain_bit_to_string[ffs(domain)-1]
+static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work);
static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
@@ -78,6 +90,7 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
/* Set memory usage limits. Current, limits are
* System (kernel) memory - 3/8th System RAM
+ * Userptr memory - 3/4th System RAM
*/
void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
{
@@ -90,8 +103,10 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
spin_lock_init(&kfd_mem_limit.mem_limit_lock);
kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3);
- pr_debug("Kernel memory limit %lluM\n",
- (kfd_mem_limit.max_system_mem_limit >> 20));
+ kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2);
+ pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n",
+ (kfd_mem_limit.max_system_mem_limit >> 20),
+ (kfd_mem_limit.max_userptr_mem_limit >> 20));
}
static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
@@ -111,6 +126,16 @@ static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
goto err_no_mem;
}
kfd_mem_limit.system_mem_used += (acc_size + size);
+ } else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
+ if ((kfd_mem_limit.system_mem_used + acc_size >
+ kfd_mem_limit.max_system_mem_limit) ||
+ (kfd_mem_limit.userptr_mem_used + (size + acc_size) >
+ kfd_mem_limit.max_userptr_mem_limit)) {
+ ret = -ENOMEM;
+ goto err_no_mem;
+ }
+ kfd_mem_limit.system_mem_used += acc_size;
+ kfd_mem_limit.userptr_mem_used += size;
}
err_no_mem:
spin_unlock(&kfd_mem_limit.mem_limit_lock);
@@ -126,10 +151,16 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev,
sizeof(struct amdgpu_bo));
spin_lock(&kfd_mem_limit.mem_limit_lock);
- if (domain == AMDGPU_GEM_DOMAIN_GTT)
+ if (domain == AMDGPU_GEM_DOMAIN_GTT) {
kfd_mem_limit.system_mem_used -= (acc_size + size);
+ } else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
+ kfd_mem_limit.system_mem_used -= acc_size;
+ kfd_mem_limit.userptr_mem_used -= size;
+ }
WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
"kfd system memory accounting unbalanced");
+ WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
+ "kfd userptr memory accounting unbalanced");
spin_unlock(&kfd_mem_limit.mem_limit_lock);
}
@@ -138,12 +169,17 @@ void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
{
spin_lock(&kfd_mem_limit.mem_limit_lock);
- if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
+ if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
+ kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
+ kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo);
+ } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
kfd_mem_limit.system_mem_used -=
(bo->tbo.acc_size + amdgpu_bo_size(bo));
}
WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
"kfd system memory accounting unbalanced");
+ WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
+ "kfd userptr memory accounting unbalanced");
spin_unlock(&kfd_mem_limit.mem_limit_lock);
}
@@ -506,7 +542,8 @@ static void remove_bo_from_vm(struct amdgpu_device *adev,
}
static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
- struct amdkfd_process_info *process_info)
+ struct amdkfd_process_info *process_info,
+ bool userptr)
{
struct ttm_validate_buffer *entry = &mem->validate_list;
struct amdgpu_bo *bo = mem->bo;
@@ -515,10 +552,95 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
entry->shared = true;
entry->bo = &bo->tbo;
mutex_lock(&process_info->lock);
- list_add_tail(&entry->head, &process_info->kfd_bo_list);
+ if (userptr)
+ list_add_tail(&entry->head, &process_info->userptr_valid_list);
+ else
+ list_add_tail(&entry->head, &process_info->kfd_bo_list);
mutex_unlock(&process_info->lock);
}
+/* Initializes user pages. It registers the MMU notifier and validates
+ * the userptr BO in the GTT domain.
+ *
+ * The BO must already be on the userptr_valid_list. Otherwise an
+ * eviction and restore may happen that leaves the new BO unmapped
+ * with the user mode queues running.
+ *
+ * Takes the process_info->lock to protect against concurrent restore
+ * workers.
+ *
+ * Returns 0 for success, negative errno for errors.
+ */
+static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
+ uint64_t user_addr)
+{
+ struct amdkfd_process_info *process_info = mem->process_info;
+ struct amdgpu_bo *bo = mem->bo;
+ struct ttm_operation_ctx ctx = { true, false };
+ int ret = 0;
+
+ mutex_lock(&process_info->lock);
+
+ ret = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, user_addr, 0);
+ if (ret) {
+ pr_err("%s: Failed to set userptr: %d\n", __func__, ret);
+ goto out;
+ }
+
+ ret = amdgpu_mn_register(bo, user_addr);
+ if (ret) {
+ pr_err("%s: Failed to register MMU notifier: %d\n",
+ __func__, ret);
+ goto out;
+ }
+
+ /* If no restore worker is running concurrently, user_pages
+ * should not be allocated
+ */
+ WARN(mem->user_pages, "Leaking user_pages array");
+
+ mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
+ sizeof(struct page *),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!mem->user_pages) {
+ pr_err("%s: Failed to allocate pages array\n", __func__);
+ ret = -ENOMEM;
+ goto unregister_out;
+ }
+
+ ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
+ if (ret) {
+ pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
+ goto free_out;
+ }
+
+ amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
+
+ ret = amdgpu_bo_reserve(bo, true);
+ if (ret) {
+ pr_err("%s: Failed to reserve BO\n", __func__);
+ goto release_out;
+ }
+ amdgpu_ttm_placement_from_domain(bo, mem->domain);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (ret)
+ pr_err("%s: failed to validate BO\n", __func__);
+ amdgpu_bo_unreserve(bo);
+
+release_out:
+ if (ret)
+ release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
+free_out:
+ kvfree(mem->user_pages);
+ mem->user_pages = NULL;
+unregister_out:
+ if (ret)
+ amdgpu_mn_unregister(bo);
+out:
+ mutex_unlock(&process_info->lock);
+ return ret;
+}
+
/* Reserving a BO and its page table BOs must happen atomically to
* avoid deadlocks. Some operations update multiple VMs at once. Track
* all the reservation info in a context structure. Optionally a sync
@@ -748,7 +870,8 @@ static int update_gpuvm_pte(struct amdgpu_device *adev,
}
static int map_bo_to_gpuvm(struct amdgpu_device *adev,
- struct kfd_bo_va_list *entry, struct amdgpu_sync *sync)
+ struct kfd_bo_va_list *entry, struct amdgpu_sync *sync,
+ bool no_update_pte)
{
int ret;
@@ -762,6 +885,9 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev,
return ret;
}
+ if (no_update_pte)
+ return 0;
+
ret = update_gpuvm_pte(adev, entry, sync);
if (ret) {
pr_err("update_gpuvm_pte() failed\n");
@@ -820,6 +946,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
mutex_init(&info->lock);
INIT_LIST_HEAD(&info->vm_list_head);
INIT_LIST_HEAD(&info->kfd_bo_list);
+ INIT_LIST_HEAD(&info->userptr_valid_list);
+ INIT_LIST_HEAD(&info->userptr_inval_list);
info->eviction_fence =
amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
@@ -830,6 +958,11 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
goto create_evict_fence_fail;
}
+ info->pid = get_task_pid(current->group_leader, PIDTYPE_PID);
+ atomic_set(&info->evicted_bos, 0);
+ INIT_DELAYED_WORK(&info->restore_userptr_work,
+ amdgpu_amdkfd_restore_userptr_worker);
+
*process_info = info;
*ef = dma_fence_get(&info->eviction_fence->base);
}
@@ -872,6 +1005,7 @@ reserve_pd_fail:
dma_fence_put(*ef);
*ef = NULL;
*process_info = NULL;
+ put_pid(info->pid);
create_evict_fence_fail:
mutex_destroy(&info->lock);
kfree(info);
@@ -967,8 +1101,12 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
/* Release per-process resources when last compute VM is destroyed */
if (!process_info->n_vms) {
WARN_ON(!list_empty(&process_info->kfd_bo_list));
+ WARN_ON(!list_empty(&process_info->userptr_valid_list));
+ WARN_ON(!list_empty(&process_info->userptr_inval_list));
dma_fence_put(&process_info->eviction_fence->base);
+ cancel_delayed_work_sync(&process_info->restore_userptr_work);
+ put_pid(process_info->pid);
mutex_destroy(&process_info->lock);
kfree(process_info);
}
@@ -1003,9 +1141,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+ uint64_t user_addr = 0;
struct amdgpu_bo *bo;
int byte_align;
- u32 alloc_domain;
+ u32 domain, alloc_domain;
u64 alloc_flags;
uint32_t mapping_flags;
int ret;
@@ -1014,14 +1153,21 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
* Check on which domain to allocate BO
*/
if (flags & ALLOC_MEM_FLAGS_VRAM) {
- alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
+ domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED;
alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ?
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED :
AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
} else if (flags & ALLOC_MEM_FLAGS_GTT) {
- alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
+ domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
+ alloc_flags = 0;
+ } else if (flags & ALLOC_MEM_FLAGS_USERPTR) {
+ domain = AMDGPU_GEM_DOMAIN_GTT;
+ alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
alloc_flags = 0;
+ if (!offset || !*offset)
+ return -EINVAL;
+ user_addr = *offset;
} else {
return -EINVAL;
}
@@ -1078,18 +1224,34 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
}
bo->kfd_bo = *mem;
(*mem)->bo = bo;
+ if (user_addr)
+ bo->flags |= AMDGPU_AMDKFD_USERPTR_BO;
(*mem)->va = va;
- (*mem)->domain = alloc_domain;
+ (*mem)->domain = domain;
(*mem)->mapped_to_gpu_memory = 0;
(*mem)->process_info = avm->process_info;
- add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info);
+ add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
+
+ if (user_addr) {
+ ret = init_user_pages(*mem, current->mm, user_addr);
+ if (ret) {
+ mutex_lock(&avm->process_info->lock);
+ list_del(&(*mem)->validate_list.head);
+ mutex_unlock(&avm->process_info->lock);
+ goto allocate_init_user_pages_failed;
+ }
+ }
if (offset)
*offset = amdgpu_bo_mmap_offset(bo);
return 0;
+allocate_init_user_pages_failed:
+ amdgpu_bo_unref(&bo);
+ /* Don't unreserve system mem limit twice */
+ goto err_reserve_system_mem;
err_bo_create:
unreserve_system_mem_limit(adev, size, alloc_domain);
err_reserve_system_mem:
@@ -1122,12 +1284,24 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
* be freed anyway
*/
+ /* No more MMU notifiers */
+ amdgpu_mn_unregister(mem->bo);
+
/* Make sure restore workers don't access the BO any more */
bo_list_entry = &mem->validate_list;
mutex_lock(&process_info->lock);
list_del(&bo_list_entry->head);
mutex_unlock(&process_info->lock);
+ /* Free user pages if necessary */
+ if (mem->user_pages) {
+ pr_debug("%s: Freeing user_pages array\n", __func__);
+ if (mem->user_pages[0])
+ release_pages(mem->user_pages,
+ mem->bo->tbo.ttm->num_pages);
+ kvfree(mem->user_pages);
+ }
+
ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
if (unlikely(ret))
return ret;
@@ -1173,21 +1347,32 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
struct kfd_bo_va_list *bo_va_entry = NULL;
struct kfd_bo_va_list *bo_va_entry_aql = NULL;
unsigned long bo_size;
-
- /* Make sure restore is not running concurrently.
- */
- mutex_lock(&mem->process_info->lock);
-
- mutex_lock(&mem->lock);
+ bool is_invalid_userptr = false;
bo = mem->bo;
-
if (!bo) {
pr_err("Invalid BO when mapping memory to GPU\n");
- ret = -EINVAL;
- goto out;
+ return -EINVAL;
}
+ /* Make sure restore is not running concurrently. Since we
+ * don't map invalid userptr BOs, we rely on the next restore
+ * worker to do the mapping
+ */
+ mutex_lock(&mem->process_info->lock);
+
+ /* Lock mmap-sem. If we find an invalid userptr BO, we can be
+ * sure that the MMU notifier is no longer running
+ * concurrently and the queues are actually stopped
+ */
+ if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
+ down_write(&current->mm->mmap_sem);
+ is_invalid_userptr = atomic_read(&mem->invalid);
+ up_write(&current->mm->mmap_sem);
+ }
+
+ mutex_lock(&mem->lock);
+
domain = mem->domain;
bo_size = bo->tbo.mem.size;
@@ -1200,6 +1385,14 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
if (unlikely(ret))
goto out;
+ /* Userptr can be marked as "not invalid", but not actually be
+ * validated yet (still in the system domain). In that case
+ * the queues are still stopped and we can leave mapping for
+ * the next restore worker
+ */
+ if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
+ is_invalid_userptr = true;
+
if (check_if_add_bo_to_vm(avm, mem)) {
ret = add_bo_to_vm(adev, mem, avm, false,
&bo_va_entry);
@@ -1217,7 +1410,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
goto add_bo_to_vm_failed;
}
- if (mem->mapped_to_gpu_memory == 0) {
+ if (mem->mapped_to_gpu_memory == 0 &&
+ !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
/* Validate BO only once. The eviction fence gets added to BO
* the first time it is mapped. Validate will wait for all
* background evictions to complete.
@@ -1235,7 +1429,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
entry->va, entry->va + bo_size,
entry);
- ret = map_bo_to_gpuvm(adev, entry, ctx.sync);
+ ret = map_bo_to_gpuvm(adev, entry, ctx.sync,
+ is_invalid_userptr);
if (ret) {
pr_err("Failed to map radeon bo to gpuvm\n");
goto map_bo_to_gpuvm_failed;
@@ -1418,6 +1613,337 @@ bo_reserve_failed:
return ret;
}
+/* Evict a userptr BO by stopping the queues if necessary
+ *
+ * Runs in MMU notifier, may be in RECLAIM_FS context. This means it
+ * cannot do any memory allocations, and cannot take any locks that
+ * are held elsewhere while allocating memory. Therefore this is as
+ * simple as possible, using atomic counters.
+ *
+ * It doesn't do anything to the BO itself. The real work happens in
+ * restore, where we get updated page addresses. This function only
+ * ensures that GPU access to the BO is stopped.
+ */
+int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
+ struct mm_struct *mm)
+{
+ struct amdkfd_process_info *process_info = mem->process_info;
+ int invalid, evicted_bos;
+ int r = 0;
+
+ invalid = atomic_inc_return(&mem->invalid);
+ evicted_bos = atomic_inc_return(&process_info->evicted_bos);
+ if (evicted_bos == 1) {
+ /* First eviction, stop the queues */
+ r = kgd2kfd->quiesce_mm(mm);
+ if (r)
+ pr_err("Failed to quiesce KFD\n");
+ schedule_delayed_work(&process_info->restore_userptr_work,
+ msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
+ }
+
+ return r;
+}
+
+/* Update invalid userptr BOs
+ *
+ * Moves invalidated (evicted) userptr BOs from userptr_valid_list to
+ * userptr_inval_list and updates user pages for all BOs that have
+ * been invalidated since their last update.
+ */
+static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
+ struct mm_struct *mm)
+{
+ struct kgd_mem *mem, *tmp_mem;
+ struct amdgpu_bo *bo;
+ struct ttm_operation_ctx ctx = { false, false };
+ int invalid, ret;
+
+ /* Move all invalidated BOs to the userptr_inval_list and
+ * release their user pages by migration to the CPU domain
+ */
+ list_for_each_entry_safe(mem, tmp_mem,
+ &process_info->userptr_valid_list,
+ validate_list.head) {
+ if (!atomic_read(&mem->invalid))
+ continue; /* BO is still valid */
+
+ bo = mem->bo;
+
+ if (amdgpu_bo_reserve(bo, true))
+ return -EAGAIN;
+ amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ amdgpu_bo_unreserve(bo);
+ if (ret) {
+ pr_err("%s: Failed to invalidate userptr BO\n",
+ __func__);
+ return -EAGAIN;
+ }
+
+ list_move_tail(&mem->validate_list.head,
+ &process_info->userptr_inval_list);
+ }
+
+ if (list_empty(&process_info->userptr_inval_list))
+ return 0; /* All evicted userptr BOs were freed */
+
+ /* Go through userptr_inval_list and update any invalid user_pages */
+ list_for_each_entry(mem, &process_info->userptr_inval_list,
+ validate_list.head) {
+ invalid = atomic_read(&mem->invalid);
+ if (!invalid)
+ /* BO hasn't been invalidated since the last
+ * revalidation attempt. Keep its BO list.
+ */
+ continue;
+
+ bo = mem->bo;
+
+ if (!mem->user_pages) {
+ mem->user_pages =
+ kvmalloc_array(bo->tbo.ttm->num_pages,
+ sizeof(struct page *),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!mem->user_pages) {
+ pr_err("%s: Failed to allocate pages array\n",
+ __func__);
+ return -ENOMEM;
+ }
+ } else if (mem->user_pages[0]) {
+ release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
+ }
+
+ /* Get updated user pages */
+ ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
+ mem->user_pages);
+ if (ret) {
+ mem->user_pages[0] = NULL;
+ pr_info("%s: Failed to get user pages: %d\n",
+ __func__, ret);
+ /* Pretend it succeeded. It will fail later
+ * with a VM fault if the GPU tries to access
+ * it. Better than hanging indefinitely with
+ * stalled user mode queues.
+ */
+ }
+
+ /* Mark the BO as valid unless it was invalidated
+ * again concurrently
+ */
+ if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
+ return -EAGAIN;
+ }
+
+ return 0;
+}
+
+/* Validate invalid userptr BOs
+ *
+ * Validates BOs on the userptr_inval_list, and moves them back to the
+ * userptr_valid_list. Also updates GPUVM page tables with new page
+ * addresses and waits for the page table updates to complete.
+ */
+static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
+{
+ struct amdgpu_bo_list_entry *pd_bo_list_entries;
+ struct list_head resv_list, duplicates;
+ struct ww_acquire_ctx ticket;
+ struct amdgpu_sync sync;
+
+ struct amdgpu_vm *peer_vm;
+ struct kgd_mem *mem, *tmp_mem;
+ struct amdgpu_bo *bo;
+ struct ttm_operation_ctx ctx = { false, false };
+ int i, ret;
+
+ pd_bo_list_entries = kcalloc(process_info->n_vms,
+ sizeof(struct amdgpu_bo_list_entry),
+ GFP_KERNEL);
+ if (!pd_bo_list_entries) {
+ pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
+ return -ENOMEM;
+ }
+
+ INIT_LIST_HEAD(&resv_list);
+ INIT_LIST_HEAD(&duplicates);
+
+ /* Get all the page directory BOs that need to be reserved */
+ i = 0;
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ vm_list_node)
+ amdgpu_vm_get_pd_bo(peer_vm, &resv_list,
+ &pd_bo_list_entries[i++]);
+ /* Add the userptr_inval_list entries to resv_list */
+ list_for_each_entry(mem, &process_info->userptr_inval_list,
+ validate_list.head) {
+ list_add_tail(&mem->resv_list.head, &resv_list);
+ mem->resv_list.bo = mem->validate_list.bo;
+ mem->resv_list.shared = mem->validate_list.shared;
+ }
+
+ /* Reserve all BOs and page tables for validation */
+ ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
+ WARN(!list_empty(&duplicates), "Duplicates should be empty");
+ if (ret)
+ goto out;
+
+ amdgpu_sync_create(&sync);
+
+ /* Avoid triggering eviction fences when unmapping invalid
+ * userptr BOs (waits for all fences, doesn't use
+ * FENCE_OWNER_VM)
+ */
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ vm_list_node)
+ amdgpu_amdkfd_remove_eviction_fence(peer_vm->root.base.bo,
+ process_info->eviction_fence,
+ NULL, NULL);
+
+ ret = process_validate_vms(process_info);
+ if (ret)
+ goto unreserve_out;
+
+ /* Validate BOs and update GPUVM page tables */
+ list_for_each_entry_safe(mem, tmp_mem,
+ &process_info->userptr_inval_list,
+ validate_list.head) {
+ struct kfd_bo_va_list *bo_va_entry;
+
+ bo = mem->bo;
+
+ /* Copy pages array and validate the BO if we got user pages */
+ if (mem->user_pages[0]) {
+ amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
+ mem->user_pages);
+ amdgpu_ttm_placement_from_domain(bo, mem->domain);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (ret) {
+ pr_err("%s: failed to validate BO\n", __func__);
+ goto unreserve_out;
+ }
+ }
+
+ /* Validate succeeded, now the BO owns the pages, free
+ * our copy of the pointer array. Put this BO back on
+ * the userptr_valid_list. If we need to revalidate
+ * it, we need to start from scratch.
+ */
+ kvfree(mem->user_pages);
+ mem->user_pages = NULL;
+ list_move_tail(&mem->validate_list.head,
+ &process_info->userptr_valid_list);
+
+ /* Update mapping. If the BO was not validated
+ * (because we couldn't get user pages), this will
+ * clear the page table entries, which will result in
+ * VM faults if the GPU tries to access the invalid
+ * memory.
+ */
+ list_for_each_entry(bo_va_entry, &mem->bo_va_list, bo_list) {
+ if (!bo_va_entry->is_mapped)
+ continue;
+
+ ret = update_gpuvm_pte((struct amdgpu_device *)
+ bo_va_entry->kgd_dev,
+ bo_va_entry, &sync);
+ if (ret) {
+ pr_err("%s: update PTE failed\n", __func__);
+ /* make sure this gets validated again */
+ atomic_inc(&mem->invalid);
+ goto unreserve_out;
+ }
+ }
+ }
+
+ /* Update page directories */
+ ret = process_update_pds(process_info, &sync);
+
+unreserve_out:
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ vm_list_node)
+ amdgpu_bo_fence(peer_vm->root.base.bo,
+ &process_info->eviction_fence->base, true);
+ ttm_eu_backoff_reservation(&ticket, &resv_list);
+ amdgpu_sync_wait(&sync, false);
+ amdgpu_sync_free(&sync);
+out:
+ kfree(pd_bo_list_entries);
+
+ return ret;
+}
+
+/* Worker callback to restore evicted userptr BOs
+ *
+ * Tries to update and validate all userptr BOs. If successful and no
+ * concurrent evictions happened, the queues are restarted. Otherwise,
+ * reschedule for another attempt later.
+ */
+static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct amdkfd_process_info *process_info =
+ container_of(dwork, struct amdkfd_process_info,
+ restore_userptr_work);
+ struct task_struct *usertask;
+ struct mm_struct *mm;
+ int evicted_bos;
+
+ evicted_bos = atomic_read(&process_info->evicted_bos);
+ if (!evicted_bos)
+ return;
+
+ /* Reference task and mm in case of concurrent process termination */
+ usertask = get_pid_task(process_info->pid, PIDTYPE_PID);
+ if (!usertask)
+ return;
+ mm = get_task_mm(usertask);
+ if (!mm) {
+ put_task_struct(usertask);
+ return;
+ }
+
+ mutex_lock(&process_info->lock);
+
+ if (update_invalid_user_pages(process_info, mm))
+ goto unlock_out;
+ /* userptr_inval_list can be empty if all evicted userptr BOs
+ * have been freed. In that case there is nothing to validate
+ * and we can just restart the queues.
+ */
+ if (!list_empty(&process_info->userptr_inval_list)) {
+ if (atomic_read(&process_info->evicted_bos) != evicted_bos)
+ goto unlock_out; /* Concurrent eviction, try again */
+
+ if (validate_invalid_user_pages(process_info))
+ goto unlock_out;
+ }
+ /* Final check for concurrent evicton and atomic update. If
+ * another eviction happens after successful update, it will
+ * be a first eviction that calls quiesce_mm. The eviction
+ * reference counting inside KFD will handle this case.
+ */
+ if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) !=
+ evicted_bos)
+ goto unlock_out;
+ evicted_bos = 0;
+ if (kgd2kfd->resume_mm(mm)) {
+ pr_err("%s: Failed to resume KFD\n", __func__);
+ /* No recovery from this failure. Probably the CP is
+ * hanging. No point trying again.
+ */
+ }
+unlock_out:
+ mutex_unlock(&process_info->lock);
+ mmput(mm);
+ put_task_struct(usertask);
+
+ /* If validation failed, reschedule another attempt */
+ if (evicted_bos)
+ schedule_delayed_work(&process_info->restore_userptr_work,
+ msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
+}
+
/** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
* KFD process identified by process_info
*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index dc34b50e6b29..8e66f3702b7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -536,7 +536,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
if (p->bo_list) {
amdgpu_bo_list_get_list(p->bo_list, &p->validated);
if (p->bo_list->first_userptr != p->bo_list->num_entries)
- p->mn = amdgpu_mn_get(p->adev);
+ p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
}
INIT_LIST_HEAD(&duplicates);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index bd67f4cb8e6c..83e344fbb50a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -36,12 +36,14 @@
#include <drm/drm.h>
#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
struct amdgpu_mn {
/* constant after initialisation */
struct amdgpu_device *adev;
struct mm_struct *mm;
struct mmu_notifier mn;
+ enum amdgpu_mn_type type;
/* only used on destruction */
struct work_struct work;
@@ -185,7 +187,7 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
}
/**
- * amdgpu_mn_invalidate_range_start - callback to notify about mm change
+ * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
*
* @mn: our notifier
* @mn: the mm this callback is about
@@ -195,10 +197,10 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
* We block for all BOs between start and end to be idle and
* unmap them by move them into system domain again.
*/
-static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start,
- unsigned long end)
+static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start,
+ unsigned long end)
{
struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
struct interval_tree_node *it;
@@ -220,6 +222,49 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
}
/**
+ * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
+ *
+ * @mn: our notifier
+ * @mn: the mm this callback is about
+ * @start: start of updated range
+ * @end: end of updated range
+ *
+ * We temporarily evict all BOs between start and end. This
+ * necessitates evicting all user-mode queues of the process. The BOs
+ * are restorted in amdgpu_mn_invalidate_range_end_hsa.
+ */
+static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start,
+ unsigned long end)
+{
+ struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
+ struct interval_tree_node *it;
+
+ /* notification is exclusive, but interval is inclusive */
+ end -= 1;
+
+ amdgpu_mn_read_lock(rmn);
+
+ it = interval_tree_iter_first(&rmn->objects, start, end);
+ while (it) {
+ struct amdgpu_mn_node *node;
+ struct amdgpu_bo *bo;
+
+ node = container_of(it, struct amdgpu_mn_node, it);
+ it = interval_tree_iter_next(it, start, end);
+
+ list_for_each_entry(bo, &node->bos, mn_list) {
+ struct kgd_mem *mem = bo->kfd_bo;
+
+ if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
+ start, end))
+ amdgpu_amdkfd_evict_userptr(mem, mm);
+ }
+ }
+}
+
+/**
* amdgpu_mn_invalidate_range_end - callback to notify about mm change
*
* @mn: our notifier
@@ -239,23 +284,39 @@ static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
amdgpu_mn_read_unlock(rmn);
}
-static const struct mmu_notifier_ops amdgpu_mn_ops = {
- .release = amdgpu_mn_release,
- .invalidate_range_start = amdgpu_mn_invalidate_range_start,
- .invalidate_range_end = amdgpu_mn_invalidate_range_end,
+static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
+ [AMDGPU_MN_TYPE_GFX] = {
+ .release = amdgpu_mn_release,
+ .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
+ .invalidate_range_end = amdgpu_mn_invalidate_range_end,
+ },
+ [AMDGPU_MN_TYPE_HSA] = {
+ .release = amdgpu_mn_release,
+ .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
+ .invalidate_range_end = amdgpu_mn_invalidate_range_end,
+ },
};
+/* Low bits of any reasonable mm pointer will be unused due to struct
+ * alignment. Use these bits to make a unique key from the mm pointer
+ * and notifier type.
+ */
+#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
+
/**
* amdgpu_mn_get - create notifier context
*
* @adev: amdgpu device pointer
+ * @type: type of MMU notifier context
*
* Creates a notifier context for current->mm.
*/
-struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
+struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
+ enum amdgpu_mn_type type)
{
struct mm_struct *mm = current->mm;
struct amdgpu_mn *rmn;
+ unsigned long key = AMDGPU_MN_KEY(mm, type);
int r;
mutex_lock(&adev->mn_lock);
@@ -264,8 +325,8 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
return ERR_PTR(-EINTR);
}
- hash_for_each_possible(adev->mn_hash, rmn, node, (unsigned long)mm)
- if (rmn->mm == mm)
+ hash_for_each_possible(adev->mn_hash, rmn, node, key)
+ if (AMDGPU_MN_KEY(rmn->mm, rmn->type) == key)
goto release_locks;
rmn = kzalloc(sizeof(*rmn), GFP_KERNEL);
@@ -276,8 +337,9 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
rmn->adev = adev;
rmn->mm = mm;
- rmn->mn.ops = &amdgpu_mn_ops;
init_rwsem(&rmn->lock);
+ rmn->type = type;
+ rmn->mn.ops = &amdgpu_mn_ops[type];
rmn->objects = RB_ROOT_CACHED;
mutex_init(&rmn->read_lock);
atomic_set(&rmn->recursion, 0);
@@ -286,7 +348,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
if (r)
goto free_rmn;
- hash_add(adev->mn_hash, &rmn->node, (unsigned long)mm);
+ hash_add(adev->mn_hash, &rmn->node, AMDGPU_MN_KEY(mm, type));
release_locks:
up_write(&mm->mmap_sem);
@@ -315,15 +377,21 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
{
unsigned long end = addr + amdgpu_bo_size(bo) - 1;
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ enum amdgpu_mn_type type =
+ bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX;
struct amdgpu_mn *rmn;
- struct amdgpu_mn_node *node = NULL;
+ struct amdgpu_mn_node *node = NULL, *new_node;
struct list_head bos;
struct interval_tree_node *it;
- rmn = amdgpu_mn_get(adev);
+ rmn = amdgpu_mn_get(adev, type);
if (IS_ERR(rmn))
return PTR_ERR(rmn);
+ new_node = kmalloc(sizeof(*new_node), GFP_KERNEL);
+ if (!new_node)
+ return -ENOMEM;
+
INIT_LIST_HEAD(&bos);
down_write(&rmn->lock);
@@ -337,13 +405,10 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
list_splice(&node->bos, &bos);
}
- if (!node) {
- node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL);
- if (!node) {
- up_write(&rmn->lock);
- return -ENOMEM;
- }
- }
+ if (!node)
+ node = new_node;
+ else
+ kfree(new_node);
bo->mn = rmn;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index d0095a3793b8..eb0f432f78fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -29,16 +29,23 @@
*/
struct amdgpu_mn;
+enum amdgpu_mn_type {
+ AMDGPU_MN_TYPE_GFX,
+ AMDGPU_MN_TYPE_HSA,
+};
+
#if defined(CONFIG_MMU_NOTIFIER)
void amdgpu_mn_lock(struct amdgpu_mn *mn);
void amdgpu_mn_unlock(struct amdgpu_mn *mn);
-struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev);
+struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
+ enum amdgpu_mn_type type);
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
void amdgpu_mn_unregister(struct amdgpu_bo *bo);
#else
static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
-static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
+static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
+ enum amdgpu_mn_type type)
{
return NULL;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 205da3ff9cd0..c713d30cba86 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -695,7 +695,7 @@ struct amdgpu_ttm_tt {
struct ttm_dma_tt ttm;
u64 offset;
uint64_t userptr;
- struct mm_struct *usermm;
+ struct task_struct *usertask;
uint32_t userflags;
spinlock_t guptasklock;
struct list_head guptasks;
@@ -706,14 +706,18 @@ struct amdgpu_ttm_tt {
int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct mm_struct *mm = gtt->usertask->mm;
unsigned int flags = 0;
unsigned pinned = 0;
int r;
+ if (!mm) /* Happens during process shutdown */
+ return -ESRCH;
+
if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
flags |= FOLL_WRITE;
- down_read(&current->mm->mmap_sem);
+ down_read(&mm->mmap_sem);
if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
/* check that we only use anonymous memory
@@ -721,9 +725,9 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
struct vm_area_struct *vma;
- vma = find_vma(gtt->usermm, gtt->userptr);
+ vma = find_vma(mm, gtt->userptr);
if (!vma || vma->vm_file || vma->vm_end < end) {
- up_read(&current->mm->mmap_sem);
+ up_read(&mm->mmap_sem);
return -EPERM;
}
}
@@ -739,7 +743,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
list_add(&guptask.list, &gtt->guptasks);
spin_unlock(&gtt->guptasklock);
- r = get_user_pages(userptr, num_pages, flags, p, NULL);
+ if (mm == current->mm)
+ r = get_user_pages(userptr, num_pages, flags, p, NULL);
+ else
+ r = get_user_pages_remote(gtt->usertask,
+ mm, userptr, num_pages,
+ flags, p, NULL, NULL);
spin_lock(&gtt->guptasklock);
list_del(&guptask.list);
@@ -752,12 +761,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
} while (pinned < ttm->num_pages);
- up_read(&current->mm->mmap_sem);
+ up_read(&mm->mmap_sem);
return 0;
release_pages:
release_pages(pages, pinned);
- up_read(&current->mm->mmap_sem);
+ up_read(&mm->mmap_sem);
return r;
}
@@ -978,6 +987,9 @@ static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ if (gtt->usertask)
+ put_task_struct(gtt->usertask);
+
ttm_dma_tt_fini(&gtt->ttm);
kfree(gtt);
}
@@ -1079,8 +1091,13 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
return -EINVAL;
gtt->userptr = addr;
- gtt->usermm = current->mm;
gtt->userflags = flags;
+
+ if (gtt->usertask)
+ put_task_struct(gtt->usertask);
+ gtt->usertask = current->group_leader;
+ get_task_struct(gtt->usertask);
+
spin_lock_init(&gtt->guptasklock);
INIT_LIST_HEAD(&gtt->guptasks);
atomic_set(&gtt->mmu_invalidations, 0);
@@ -1096,7 +1113,10 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
if (gtt == NULL)
return NULL;
- return gtt->usermm;
+ if (gtt->usertask == NULL)
+ return NULL;
+
+ return gtt->usertask->mm;
}
bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 9d39fd5b1822..e5962e61beb5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4686,6 +4686,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
cu_info->number = active_cu_number;
cu_info->ao_cu_mask = ao_cu_mask;
+ cu_info->simd_per_cu = NUM_SIMD_PER_CU;
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h
index 7f408f85fdb6..f22f7a88ce0f 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15d.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h
@@ -268,6 +268,11 @@
* x=1: tmz_end
*/
+#define PACKET3_INVALIDATE_TLBS 0x98
+# define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0)
+# define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4)
+# define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5)
+# define PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(x) ((x) << 29)
#define PACKET3_SET_RESOURCES 0xA0
/* 1. header
* 2. CONTROL
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
index 0d0242240c47..ffd096fffc1c 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -30,12 +30,14 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \
kfd_process.o kfd_queue.o kfd_mqd_manager.o \
kfd_mqd_manager_cik.o kfd_mqd_manager_vi.o \
+ kfd_mqd_manager_v9.o \
kfd_kernel_queue.o kfd_kernel_queue_cik.o \
- kfd_kernel_queue_vi.o kfd_packet_manager.o \
- kfd_process_queue_manager.o kfd_device_queue_manager.o \
- kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \
+ kfd_kernel_queue_vi.o kfd_kernel_queue_v9.o \
+ kfd_packet_manager.o kfd_process_queue_manager.o \
+ kfd_device_queue_manager.o kfd_device_queue_manager_cik.o \
+ kfd_device_queue_manager_vi.o kfd_device_queue_manager_v9.o \
kfd_interrupt.o kfd_events.o cik_event_interrupt.o \
- kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o
+ kfd_int_process_v9.o kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o
ifneq ($(CONFIG_AMD_IOMMU_V2),)
amdkfd-y += kfd_iommu.o
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
index 3d5ccb3755d4..49df6c791cfc 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
@@ -27,18 +27,28 @@
static bool cik_event_interrupt_isr(struct kfd_dev *dev,
const uint32_t *ih_ring_entry)
{
- unsigned int pasid;
const struct cik_ih_ring_entry *ihre =
(const struct cik_ih_ring_entry *)ih_ring_entry;
+ unsigned int vmid, pasid;
+
+ /* Only handle interrupts from KFD VMIDs */
+ vmid = (ihre->ring_id & 0x0000ff00) >> 8;
+ if (vmid < dev->vm_info.first_vmid_kfd ||
+ vmid > dev->vm_info.last_vmid_kfd)
+ return 0;
+ /* If there is no valid PASID, it's likely a firmware bug */
pasid = (ihre->ring_id & 0xffff0000) >> 16;
+ if (WARN_ONCE(pasid == 0, "FW bug: No PASID in KFD interrupt"))
+ return 0;
- /* Do not process in ISR, just request it to be forwarded to WQ. */
- return (pasid != 0) &&
- (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE ||
+ /* Interrupt types we care about: various signals and faults.
+ * They will be forwarded to a work queue (see below).
+ */
+ return ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE ||
ihre->source_id == CIK_INTSRC_SDMA_TRAP ||
ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG ||
- ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE);
+ ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE;
}
static void cik_event_interrupt_wq(struct kfd_dev *dev,
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_regs.h b/drivers/gpu/drm/amd/amdkfd/cik_regs.h
index 48769d12dd7b..37ce6dd65391 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_regs.h
+++ b/drivers/gpu/drm/amd/amdkfd/cik_regs.h
@@ -33,7 +33,8 @@
#define APE1_MTYPE(x) ((x) << 7)
/* valid for both DEFAULT_MTYPE and APE1_MTYPE */
-#define MTYPE_CACHED 0
+#define MTYPE_CACHED_NV 0
+#define MTYPE_CACHED 1
#define MTYPE_NONCACHED 3
#define DEFAULT_CP_HQD_PERSISTENT_STATE (0x33U << 8)
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
new file mode 100644
index 000000000000..f68aef02fc1f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -0,0 +1,560 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+static const uint32_t cwsr_trap_gfx8_hex[] = {
+ 0xbf820001, 0xbf820125,
+ 0xb8f4f802, 0x89748674,
+ 0xb8f5f803, 0x8675ff75,
+ 0x00000400, 0xbf850011,
+ 0xc00a1e37, 0x00000000,
+ 0xbf8c007f, 0x87777978,
+ 0xbf840002, 0xb974f802,
+ 0xbe801d78, 0xb8f5f803,
+ 0x8675ff75, 0x000001ff,
+ 0xbf850002, 0x80708470,
+ 0x82718071, 0x8671ff71,
+ 0x0000ffff, 0xb974f802,
+ 0xbe801f70, 0xb8f5f803,
+ 0x8675ff75, 0x00000100,
+ 0xbf840006, 0xbefa0080,
+ 0xb97a0203, 0x8671ff71,
+ 0x0000ffff, 0x80f08870,
+ 0x82f18071, 0xbefa0080,
+ 0xb97a0283, 0xbef60068,
+ 0xbef70069, 0xb8fa1c07,
+ 0x8e7a9c7a, 0x87717a71,
+ 0xb8fa03c7, 0x8e7a9b7a,
+ 0x87717a71, 0xb8faf807,
+ 0x867aff7a, 0x00007fff,
+ 0xb97af807, 0xbef2007e,
+ 0xbef3007f, 0xbefe0180,
+ 0xbf900004, 0x877a8474,
+ 0xb97af802, 0xbf8e0002,
+ 0xbf88fffe, 0xbef8007e,
+ 0x8679ff7f, 0x0000ffff,
+ 0x8779ff79, 0x00040000,
+ 0xbefa0080, 0xbefb00ff,
+ 0x00807fac, 0x867aff7f,
+ 0x08000000, 0x8f7a837a,
+ 0x877b7a7b, 0x867aff7f,
+ 0x70000000, 0x8f7a817a,
+ 0x877b7a7b, 0xbeef007c,
+ 0xbeee0080, 0xb8ee2a05,
+ 0x806e816e, 0x8e6e8a6e,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x806e7a6e,
+ 0xbefa0084, 0xbefa00ff,
+ 0x01000000, 0xbefe007c,
+ 0xbefc006e, 0xc0611bfc,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc006e, 0xc0611c3c,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc006e, 0xc0611c7c,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc006e, 0xc0611cbc,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc006e, 0xc0611cfc,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc006e, 0xc0611d3c,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0xb8f5f803,
+ 0xbefe007c, 0xbefc006e,
+ 0xc0611d7c, 0x0000007c,
+ 0x806e846e, 0xbefc007e,
+ 0xbefe007c, 0xbefc006e,
+ 0xc0611dbc, 0x0000007c,
+ 0x806e846e, 0xbefc007e,
+ 0xbefe007c, 0xbefc006e,
+ 0xc0611dfc, 0x0000007c,
+ 0x806e846e, 0xbefc007e,
+ 0xb8eff801, 0xbefe007c,
+ 0xbefc006e, 0xc0611bfc,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc006e, 0xc0611b3c,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc006e, 0xc0611b7c,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0x867aff7f,
+ 0x04000000, 0xbef30080,
+ 0x8773737a, 0xb8ee2a05,
+ 0x806e816e, 0x8e6e8a6e,
+ 0xb8f51605, 0x80758175,
+ 0x8e758475, 0x8e7a8275,
+ 0xbefa00ff, 0x01000000,
+ 0xbef60178, 0x80786e78,
+ 0x82798079, 0xbefc0080,
+ 0xbe802b00, 0xbe822b02,
+ 0xbe842b04, 0xbe862b06,
+ 0xbe882b08, 0xbe8a2b0a,
+ 0xbe8c2b0c, 0xbe8e2b0e,
+ 0xc06b003c, 0x00000000,
+ 0xc06b013c, 0x00000010,
+ 0xc06b023c, 0x00000020,
+ 0xc06b033c, 0x00000030,
+ 0x8078c078, 0x82798079,
+ 0x807c907c, 0xbf0a757c,
+ 0xbf85ffeb, 0xbef80176,
+ 0xbeee0080, 0xbefe00c1,
+ 0xbeff00c1, 0xbefa00ff,
+ 0x01000000, 0xe0724000,
+ 0x6e1e0000, 0xe0724100,
+ 0x6e1e0100, 0xe0724200,
+ 0x6e1e0200, 0xe0724300,
+ 0x6e1e0300, 0xbefe00c1,
+ 0xbeff00c1, 0xb8f54306,
+ 0x8675c175, 0xbf84002c,
+ 0xbf8a0000, 0x867aff73,
+ 0x04000000, 0xbf840028,
+ 0x8e758675, 0x8e758275,
+ 0xbefa0075, 0xb8ee2a05,
+ 0x806e816e, 0x8e6e8a6e,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x806e7a6e,
+ 0x806eff6e, 0x00000080,
+ 0xbefa00ff, 0x01000000,
+ 0xbefc0080, 0xd28c0002,
+ 0x000100c1, 0xd28d0003,
+ 0x000204c1, 0xd1060002,
+ 0x00011103, 0x7e0602ff,
+ 0x00000200, 0xbefc00ff,
+ 0x00010000, 0xbe80007b,
+ 0x867bff7b, 0xff7fffff,
+ 0x877bff7b, 0x00058000,
+ 0xd8ec0000, 0x00000002,
+ 0xbf8c007f, 0xe0765000,
+ 0x6e1e0002, 0x32040702,
+ 0xd0c9006a, 0x0000eb02,
+ 0xbf87fff7, 0xbefb0000,
+ 0xbeee00ff, 0x00000400,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xb8f52a05, 0x80758175,
+ 0x8e758275, 0x8e7a8875,
+ 0xbefa00ff, 0x01000000,
+ 0xbefc0084, 0xbf0a757c,
+ 0xbf840015, 0xbf11017c,
+ 0x8075ff75, 0x00001000,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
+ 0xe0724000, 0x6e1e0000,
+ 0xe0724100, 0x6e1e0100,
+ 0xe0724200, 0x6e1e0200,
+ 0xe0724300, 0x6e1e0300,
+ 0x807c847c, 0x806eff6e,
+ 0x00000400, 0xbf0a757c,
+ 0xbf85ffef, 0xbf9c0000,
+ 0xbf8200ca, 0xbef8007e,
+ 0x8679ff7f, 0x0000ffff,
+ 0x8779ff79, 0x00040000,
+ 0xbefa0080, 0xbefb00ff,
+ 0x00807fac, 0x8676ff7f,
+ 0x08000000, 0x8f768376,
+ 0x877b767b, 0x8676ff7f,
+ 0x70000000, 0x8f768176,
+ 0x877b767b, 0x8676ff7f,
+ 0x04000000, 0xbf84001e,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xb8f34306, 0x8673c173,
+ 0xbf840019, 0x8e738673,
+ 0x8e738273, 0xbefa0073,
+ 0xb8f22a05, 0x80728172,
+ 0x8e728a72, 0xb8f61605,
+ 0x80768176, 0x8e768676,
+ 0x80727672, 0x8072ff72,
+ 0x00000080, 0xbefa00ff,
+ 0x01000000, 0xbefc0080,
+ 0xe0510000, 0x721e0000,
+ 0xe0510100, 0x721e0000,
+ 0x807cff7c, 0x00000200,
+ 0x8072ff72, 0x00000200,
+ 0xbf0a737c, 0xbf85fff6,
+ 0xbef20080, 0xbefe00c1,
+ 0xbeff00c1, 0xb8f32a05,
+ 0x80738173, 0x8e738273,
+ 0x8e7a8873, 0xbefa00ff,
+ 0x01000000, 0xbef60072,
+ 0x8072ff72, 0x00000400,
+ 0xbefc0084, 0xbf11087c,
+ 0x8073ff73, 0x00008000,
+ 0xe0524000, 0x721e0000,
+ 0xe0524100, 0x721e0100,
+ 0xe0524200, 0x721e0200,
+ 0xe0524300, 0x721e0300,
+ 0xbf8c0f70, 0x7e000300,
+ 0x7e020301, 0x7e040302,
+ 0x7e060303, 0x807c847c,
+ 0x8072ff72, 0x00000400,
+ 0xbf0a737c, 0xbf85ffee,
+ 0xbf9c0000, 0xe0524000,
+ 0x761e0000, 0xe0524100,
+ 0x761e0100, 0xe0524200,
+ 0x761e0200, 0xe0524300,
+ 0x761e0300, 0xb8f22a05,
+ 0x80728172, 0x8e728a72,
+ 0xb8f61605, 0x80768176,
+ 0x8e768676, 0x80727672,
+ 0x80f2c072, 0xb8f31605,
+ 0x80738173, 0x8e738473,
+ 0x8e7a8273, 0xbefa00ff,
+ 0x01000000, 0xbefc0073,
+ 0xc031003c, 0x00000072,
+ 0x80f2c072, 0xbf8c007f,
+ 0x80fc907c, 0xbe802d00,
+ 0xbe822d02, 0xbe842d04,
+ 0xbe862d06, 0xbe882d08,
+ 0xbe8a2d0a, 0xbe8c2d0c,
+ 0xbe8e2d0e, 0xbf06807c,
+ 0xbf84fff1, 0xb8f22a05,
+ 0x80728172, 0x8e728a72,
+ 0xb8f61605, 0x80768176,
+ 0x8e768676, 0x80727672,
+ 0xbefa0084, 0xbefa00ff,
+ 0x01000000, 0xc0211cfc,
+ 0x00000072, 0x80728472,
+ 0xc0211c3c, 0x00000072,
+ 0x80728472, 0xc0211c7c,
+ 0x00000072, 0x80728472,
+ 0xc0211bbc, 0x00000072,
+ 0x80728472, 0xc0211bfc,
+ 0x00000072, 0x80728472,
+ 0xc0211d3c, 0x00000072,
+ 0x80728472, 0xc0211d7c,
+ 0x00000072, 0x80728472,
+ 0xc0211a3c, 0x00000072,
+ 0x80728472, 0xc0211a7c,
+ 0x00000072, 0x80728472,
+ 0xc0211dfc, 0x00000072,
+ 0x80728472, 0xc0211b3c,
+ 0x00000072, 0x80728472,
+ 0xc0211b7c, 0x00000072,
+ 0x80728472, 0xbf8c007f,
+ 0xbefc0073, 0xbefe006e,
+ 0xbeff006f, 0x867375ff,
+ 0x000003ff, 0xb9734803,
+ 0x867375ff, 0xfffff800,
+ 0x8f738b73, 0xb973a2c3,
+ 0xb977f801, 0x8673ff71,
+ 0xf0000000, 0x8f739c73,
+ 0x8e739073, 0xbef60080,
+ 0x87767376, 0x8673ff71,
+ 0x08000000, 0x8f739b73,
+ 0x8e738f73, 0x87767376,
+ 0x8673ff74, 0x00800000,
+ 0x8f739773, 0xb976f807,
+ 0x8671ff71, 0x0000ffff,
+ 0x86fe7e7e, 0x86ea6a6a,
+ 0xb974f802, 0xbf8a0000,
+ 0x95807370, 0xbf810000,
+};
+
+
+static const uint32_t cwsr_trap_gfx9_hex[] = {
+ 0xbf820001, 0xbf82015a,
+ 0xb8f8f802, 0x89788678,
+ 0xb8f1f803, 0x866eff71,
+ 0x00000400, 0xbf850034,
+ 0x866eff71, 0x00000800,
+ 0xbf850003, 0x866eff71,
+ 0x00000100, 0xbf840008,
+ 0x866eff78, 0x00002000,
+ 0xbf840001, 0xbf810000,
+ 0x8778ff78, 0x00002000,
+ 0x80ec886c, 0x82ed806d,
+ 0xb8eef807, 0x866fff6e,
+ 0x001f8000, 0x8e6f8b6f,
+ 0x8977ff77, 0xfc000000,
+ 0x87776f77, 0x896eff6e,
+ 0x001f8000, 0xb96ef807,
+ 0xb8f0f812, 0xb8f1f813,
+ 0x8ef08870, 0xc0071bb8,
+ 0x00000000, 0xbf8cc07f,
+ 0xc0071c38, 0x00000008,
+ 0xbf8cc07f, 0x86ee6e6e,
+ 0xbf840001, 0xbe801d6e,
+ 0xb8f1f803, 0x8671ff71,
+ 0x000001ff, 0xbf850002,
+ 0x806c846c, 0x826d806d,
+ 0x866dff6d, 0x0000ffff,
+ 0x8f6e8b77, 0x866eff6e,
+ 0x001f8000, 0xb96ef807,
+ 0x86fe7e7e, 0x86ea6a6a,
+ 0xb978f802, 0xbe801f6c,
+ 0x866dff6d, 0x0000ffff,
+ 0xbef00080, 0xb9700283,
+ 0xb8f02407, 0x8e709c70,
+ 0x876d706d, 0xb8f003c7,
+ 0x8e709b70, 0x876d706d,
+ 0xb8f0f807, 0x8670ff70,
+ 0x00007fff, 0xb970f807,
+ 0xbeee007e, 0xbeef007f,
+ 0xbefe0180, 0xbf900004,
+ 0x87708478, 0xb970f802,
+ 0xbf8e0002, 0xbf88fffe,
+ 0xb8f02a05, 0x80708170,
+ 0x8e708a70, 0xb8f11605,
+ 0x80718171, 0x8e718671,
+ 0x80707170, 0x80707e70,
+ 0x8271807f, 0x8671ff71,
+ 0x0000ffff, 0xc0471cb8,
+ 0x00000040, 0xbf8cc07f,
+ 0xc04b1d38, 0x00000048,
+ 0xbf8cc07f, 0xc0431e78,
+ 0x00000058, 0xbf8cc07f,
+ 0xc0471eb8, 0x0000005c,
+ 0xbf8cc07f, 0xbef4007e,
+ 0x8675ff7f, 0x0000ffff,
+ 0x8775ff75, 0x00040000,
+ 0xbef60080, 0xbef700ff,
+ 0x00807fac, 0x8670ff7f,
+ 0x08000000, 0x8f708370,
+ 0x87777077, 0x8670ff7f,
+ 0x70000000, 0x8f708170,
+ 0x87777077, 0xbefb007c,
+ 0xbefa0080, 0xb8fa2a05,
+ 0x807a817a, 0x8e7a8a7a,
+ 0xb8f01605, 0x80708170,
+ 0x8e708670, 0x807a707a,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xbefe007c,
+ 0xbefc007a, 0xc0611efa,
+ 0x0000007c, 0xbf8cc07f,
+ 0x807a847a, 0xbefc007e,
+ 0xbefe007c, 0xbefc007a,
+ 0xc0611b3a, 0x0000007c,
+ 0xbf8cc07f, 0x807a847a,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc007a, 0xc0611b7a,
+ 0x0000007c, 0xbf8cc07f,
+ 0x807a847a, 0xbefc007e,
+ 0xbefe007c, 0xbefc007a,
+ 0xc0611bba, 0x0000007c,
+ 0xbf8cc07f, 0x807a847a,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc007a, 0xc0611bfa,
+ 0x0000007c, 0xbf8cc07f,
+ 0x807a847a, 0xbefc007e,
+ 0xbefe007c, 0xbefc007a,
+ 0xc0611e3a, 0x0000007c,
+ 0xbf8cc07f, 0x807a847a,
+ 0xbefc007e, 0xb8f1f803,
+ 0xbefe007c, 0xbefc007a,
+ 0xc0611c7a, 0x0000007c,
+ 0xbf8cc07f, 0x807a847a,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc007a, 0xc0611a3a,
+ 0x0000007c, 0xbf8cc07f,
+ 0x807a847a, 0xbefc007e,
+ 0xbefe007c, 0xbefc007a,
+ 0xc0611a7a, 0x0000007c,
+ 0xbf8cc07f, 0x807a847a,
+ 0xbefc007e, 0xb8fbf801,
+ 0xbefe007c, 0xbefc007a,
+ 0xc0611efa, 0x0000007c,
+ 0xbf8cc07f, 0x807a847a,
+ 0xbefc007e, 0x8670ff7f,
+ 0x04000000, 0xbeef0080,
+ 0x876f6f70, 0xb8fa2a05,
+ 0x807a817a, 0x8e7a8a7a,
+ 0xb8f11605, 0x80718171,
+ 0x8e718471, 0x8e768271,
+ 0xbef600ff, 0x01000000,
+ 0xbef20174, 0x80747a74,
+ 0x82758075, 0xbefc0080,
+ 0xbf800000, 0xbe802b00,
+ 0xbe822b02, 0xbe842b04,
+ 0xbe862b06, 0xbe882b08,
+ 0xbe8a2b0a, 0xbe8c2b0c,
+ 0xbe8e2b0e, 0xc06b003a,
+ 0x00000000, 0xbf8cc07f,
+ 0xc06b013a, 0x00000010,
+ 0xbf8cc07f, 0xc06b023a,
+ 0x00000020, 0xbf8cc07f,
+ 0xc06b033a, 0x00000030,
+ 0xbf8cc07f, 0x8074c074,
+ 0x82758075, 0x807c907c,
+ 0xbf0a717c, 0xbf85ffe7,
+ 0xbef40172, 0xbefa0080,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xbee80080, 0xbee90080,
+ 0xbef600ff, 0x01000000,
+ 0xe0724000, 0x7a1d0000,
+ 0xe0724100, 0x7a1d0100,
+ 0xe0724200, 0x7a1d0200,
+ 0xe0724300, 0x7a1d0300,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xb8f14306, 0x8671c171,
+ 0xbf84002c, 0xbf8a0000,
+ 0x8670ff6f, 0x04000000,
+ 0xbf840028, 0x8e718671,
+ 0x8e718271, 0xbef60071,
+ 0xb8fa2a05, 0x807a817a,
+ 0x8e7a8a7a, 0xb8f01605,
+ 0x80708170, 0x8e708670,
+ 0x807a707a, 0x807aff7a,
+ 0x00000080, 0xbef600ff,
+ 0x01000000, 0xbefc0080,
+ 0xd28c0002, 0x000100c1,
+ 0xd28d0003, 0x000204c1,
+ 0xd1060002, 0x00011103,
+ 0x7e0602ff, 0x00000200,
+ 0xbefc00ff, 0x00010000,
+ 0xbe800077, 0x8677ff77,
+ 0xff7fffff, 0x8777ff77,
+ 0x00058000, 0xd8ec0000,
+ 0x00000002, 0xbf8cc07f,
+ 0xe0765000, 0x7a1d0002,
+ 0x68040702, 0xd0c9006a,
+ 0x0000e302, 0xbf87fff7,
+ 0xbef70000, 0xbefa00ff,
+ 0x00000400, 0xbefe00c1,
+ 0xbeff00c1, 0xb8f12a05,
+ 0x80718171, 0x8e718271,
+ 0x8e768871, 0xbef600ff,
+ 0x01000000, 0xbefc0084,
+ 0xbf0a717c, 0xbf840015,
+ 0xbf11017c, 0x8071ff71,
+ 0x00001000, 0x7e000300,
+ 0x7e020301, 0x7e040302,
+ 0x7e060303, 0xe0724000,
+ 0x7a1d0000, 0xe0724100,
+ 0x7a1d0100, 0xe0724200,
+ 0x7a1d0200, 0xe0724300,
+ 0x7a1d0300, 0x807c847c,
+ 0x807aff7a, 0x00000400,
+ 0xbf0a717c, 0xbf85ffef,
+ 0xbf9c0000, 0xbf8200d9,
+ 0xbef4007e, 0x8675ff7f,
+ 0x0000ffff, 0x8775ff75,
+ 0x00040000, 0xbef60080,
+ 0xbef700ff, 0x00807fac,
+ 0x866eff7f, 0x08000000,
+ 0x8f6e836e, 0x87776e77,
+ 0x866eff7f, 0x70000000,
+ 0x8f6e816e, 0x87776e77,
+ 0x866eff7f, 0x04000000,
+ 0xbf84001e, 0xbefe00c1,
+ 0xbeff00c1, 0xb8ef4306,
+ 0x866fc16f, 0xbf840019,
+ 0x8e6f866f, 0x8e6f826f,
+ 0xbef6006f, 0xb8f82a05,
+ 0x80788178, 0x8e788a78,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0x8078ff78, 0x00000080,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0080, 0xe0510000,
+ 0x781d0000, 0xe0510100,
+ 0x781d0000, 0x807cff7c,
+ 0x00000200, 0x8078ff78,
+ 0x00000200, 0xbf0a6f7c,
+ 0xbf85fff6, 0xbef80080,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xb8ef2a05, 0x806f816f,
+ 0x8e6f826f, 0x8e76886f,
+ 0xbef600ff, 0x01000000,
+ 0xbeee0078, 0x8078ff78,
+ 0x00000400, 0xbefc0084,
+ 0xbf11087c, 0x806fff6f,
+ 0x00008000, 0xe0524000,
+ 0x781d0000, 0xe0524100,
+ 0x781d0100, 0xe0524200,
+ 0x781d0200, 0xe0524300,
+ 0x781d0300, 0xbf8c0f70,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
+ 0x807c847c, 0x8078ff78,
+ 0x00000400, 0xbf0a6f7c,
+ 0xbf85ffee, 0xbf9c0000,
+ 0xe0524000, 0x6e1d0000,
+ 0xe0524100, 0x6e1d0100,
+ 0xe0524200, 0x6e1d0200,
+ 0xe0524300, 0x6e1d0300,
+ 0xb8f82a05, 0x80788178,
+ 0x8e788a78, 0xb8ee1605,
+ 0x806e816e, 0x8e6e866e,
+ 0x80786e78, 0x80f8c078,
+ 0xb8ef1605, 0x806f816f,
+ 0x8e6f846f, 0x8e76826f,
+ 0xbef600ff, 0x01000000,
+ 0xbefc006f, 0xc031003a,
+ 0x00000078, 0x80f8c078,
+ 0xbf8cc07f, 0x80fc907c,
+ 0xbf800000, 0xbe802d00,
+ 0xbe822d02, 0xbe842d04,
+ 0xbe862d06, 0xbe882d08,
+ 0xbe8a2d0a, 0xbe8c2d0c,
+ 0xbe8e2d0e, 0xbf06807c,
+ 0xbf84fff0, 0xb8f82a05,
+ 0x80788178, 0x8e788a78,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xc0211bfa,
+ 0x00000078, 0x80788478,
+ 0xc0211b3a, 0x00000078,
+ 0x80788478, 0xc0211b7a,
+ 0x00000078, 0x80788478,
+ 0xc0211eba, 0x00000078,
+ 0x80788478, 0xc0211efa,
+ 0x00000078, 0x80788478,
+ 0xc0211c3a, 0x00000078,
+ 0x80788478, 0xc0211c7a,
+ 0x00000078, 0x80788478,
+ 0xc0211a3a, 0x00000078,
+ 0x80788478, 0xc0211a7a,
+ 0x00000078, 0x80788478,
+ 0xc0211cfa, 0x00000078,
+ 0x80788478, 0xbf8cc07f,
+ 0xbefc006f, 0xbefe007a,
+ 0xbeff007b, 0x866f71ff,
+ 0x000003ff, 0xb96f4803,
+ 0x866f71ff, 0xfffff800,
+ 0x8f6f8b6f, 0xb96fa2c3,
+ 0xb973f801, 0xb8ee2a05,
+ 0x806e816e, 0x8e6e8a6e,
+ 0xb8ef1605, 0x806f816f,
+ 0x8e6f866f, 0x806e6f6e,
+ 0x806e746e, 0x826f8075,
+ 0x866fff6f, 0x0000ffff,
+ 0xc0071cb7, 0x00000040,
+ 0xc00b1d37, 0x00000048,
+ 0xc0031e77, 0x00000058,
+ 0xc0071eb7, 0x0000005c,
+ 0xbf8cc07f, 0x866fff6d,
+ 0xf0000000, 0x8f6f9c6f,
+ 0x8e6f906f, 0xbeee0080,
+ 0x876e6f6e, 0x866fff6d,
+ 0x08000000, 0x8f6f9b6f,
+ 0x8e6f8f6f, 0x876e6f6e,
+ 0x866fff70, 0x00800000,
+ 0x8f6f976f, 0xb96ef807,
+ 0x866dff6d, 0x0000ffff,
+ 0x86fe7e7e, 0x86ea6a6a,
+ 0xb970f802, 0xbf8a0000,
+ 0x95806f6c, 0xbf810000,
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
index 997a383dcb8b..a2a04bb64096 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
@@ -20,9 +20,12 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#if 0
-HW (VI) source code for CWSR trap handler
-#Version 18 + multiple trap handler
+/* To compile this assembly code:
+ * PROJECT=vi ./sp3 cwsr_trap_handler_gfx8.asm -hex tmp.hex
+ */
+
+/* HW (VI) source code for CWSR trap handler */
+/* Version 18 + multiple trap handler */
// this performance-optimal version was originally from Seven Xu at SRDC
@@ -98,6 +101,7 @@ var SWIZZLE_EN = 0 //whether we use swi
/**************************************************************************/
var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23
var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000
+var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1
var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
@@ -149,7 +153,7 @@ var s_save_spi_init_lo = exec_lo
var s_save_spi_init_hi = exec_hi
//tba_lo and tba_hi need to be saved/restored
-var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3??h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]}
+var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3'h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]}
var s_save_pc_hi = ttmp1
var s_save_exec_lo = ttmp2
var s_save_exec_hi = ttmp3
@@ -319,6 +323,10 @@ end
s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC
end
+ // Set SPI_PRIO=2 to avoid starving instruction fetch in the waves we're waiting for.
+ s_or_b32 s_save_tmp, s_save_status, (2 << SQ_WAVE_STATUS_SPI_PRIO_SHIFT)
+ s_setreg_b32 hwreg(HW_REG_STATUS), s_save_tmp
+
L_SLEEP:
s_sleep 0x2 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0
@@ -1007,8 +1015,6 @@ end
s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS
- s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
-
//for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise:
if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL))
s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8 //pc[31:0]+8 //two back-to-back s_trap are used (first for save and second for restore)
@@ -1044,6 +1050,7 @@ end
s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT
s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp
+ s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu
@@ -1127,258 +1134,3 @@ end
function get_hwreg_size_bytes
return 128 //HWREG size 128 bytes
end
-
-
-#endif
-
-static const uint32_t cwsr_trap_gfx8_hex[] = {
- 0xbf820001, 0xbf820123,
- 0xb8f4f802, 0x89748674,
- 0xb8f5f803, 0x8675ff75,
- 0x00000400, 0xbf850011,
- 0xc00a1e37, 0x00000000,
- 0xbf8c007f, 0x87777978,
- 0xbf840002, 0xb974f802,
- 0xbe801d78, 0xb8f5f803,
- 0x8675ff75, 0x000001ff,
- 0xbf850002, 0x80708470,
- 0x82718071, 0x8671ff71,
- 0x0000ffff, 0xb974f802,
- 0xbe801f70, 0xb8f5f803,
- 0x8675ff75, 0x00000100,
- 0xbf840006, 0xbefa0080,
- 0xb97a0203, 0x8671ff71,
- 0x0000ffff, 0x80f08870,
- 0x82f18071, 0xbefa0080,
- 0xb97a0283, 0xbef60068,
- 0xbef70069, 0xb8fa1c07,
- 0x8e7a9c7a, 0x87717a71,
- 0xb8fa03c7, 0x8e7a9b7a,
- 0x87717a71, 0xb8faf807,
- 0x867aff7a, 0x00007fff,
- 0xb97af807, 0xbef2007e,
- 0xbef3007f, 0xbefe0180,
- 0xbf900004, 0xbf8e0002,
- 0xbf88fffe, 0xbef8007e,
- 0x8679ff7f, 0x0000ffff,
- 0x8779ff79, 0x00040000,
- 0xbefa0080, 0xbefb00ff,
- 0x00807fac, 0x867aff7f,
- 0x08000000, 0x8f7a837a,
- 0x877b7a7b, 0x867aff7f,
- 0x70000000, 0x8f7a817a,
- 0x877b7a7b, 0xbeef007c,
- 0xbeee0080, 0xb8ee2a05,
- 0x806e816e, 0x8e6e8a6e,
- 0xb8fa1605, 0x807a817a,
- 0x8e7a867a, 0x806e7a6e,
- 0xbefa0084, 0xbefa00ff,
- 0x01000000, 0xbefe007c,
- 0xbefc006e, 0xc0611bfc,
- 0x0000007c, 0x806e846e,
- 0xbefc007e, 0xbefe007c,
- 0xbefc006e, 0xc0611c3c,
- 0x0000007c, 0x806e846e,
- 0xbefc007e, 0xbefe007c,
- 0xbefc006e, 0xc0611c7c,
- 0x0000007c, 0x806e846e,
- 0xbefc007e, 0xbefe007c,
- 0xbefc006e, 0xc0611cbc,
- 0x0000007c, 0x806e846e,
- 0xbefc007e, 0xbefe007c,
- 0xbefc006e, 0xc0611cfc,
- 0x0000007c, 0x806e846e,
- 0xbefc007e, 0xbefe007c,
- 0xbefc006e, 0xc0611d3c,
- 0x0000007c, 0x806e846e,
- 0xbefc007e, 0xb8f5f803,
- 0xbefe007c, 0xbefc006e,
- 0xc0611d7c, 0x0000007c,
- 0x806e846e, 0xbefc007e,
- 0xbefe007c, 0xbefc006e,
- 0xc0611dbc, 0x0000007c,
- 0x806e846e, 0xbefc007e,
- 0xbefe007c, 0xbefc006e,
- 0xc0611dfc, 0x0000007c,
- 0x806e846e, 0xbefc007e,
- 0xb8eff801, 0xbefe007c,
- 0xbefc006e, 0xc0611bfc,
- 0x0000007c, 0x806e846e,
- 0xbefc007e, 0xbefe007c,
- 0xbefc006e, 0xc0611b3c,
- 0x0000007c, 0x806e846e,
- 0xbefc007e, 0xbefe007c,
- 0xbefc006e, 0xc0611b7c,
- 0x0000007c, 0x806e846e,
- 0xbefc007e, 0x867aff7f,
- 0x04000000, 0xbef30080,
- 0x8773737a, 0xb8ee2a05,
- 0x806e816e, 0x8e6e8a6e,
- 0xb8f51605, 0x80758175,
- 0x8e758475, 0x8e7a8275,
- 0xbefa00ff, 0x01000000,
- 0xbef60178, 0x80786e78,
- 0x82798079, 0xbefc0080,
- 0xbe802b00, 0xbe822b02,
- 0xbe842b04, 0xbe862b06,
- 0xbe882b08, 0xbe8a2b0a,
- 0xbe8c2b0c, 0xbe8e2b0e,
- 0xc06b003c, 0x00000000,
- 0xc06b013c, 0x00000010,
- 0xc06b023c, 0x00000020,
- 0xc06b033c, 0x00000030,
- 0x8078c078, 0x82798079,
- 0x807c907c, 0xbf0a757c,
- 0xbf85ffeb, 0xbef80176,
- 0xbeee0080, 0xbefe00c1,
- 0xbeff00c1, 0xbefa00ff,
- 0x01000000, 0xe0724000,
- 0x6e1e0000, 0xe0724100,
- 0x6e1e0100, 0xe0724200,
- 0x6e1e0200, 0xe0724300,
- 0x6e1e0300, 0xbefe00c1,
- 0xbeff00c1, 0xb8f54306,
- 0x8675c175, 0xbf84002c,
- 0xbf8a0000, 0x867aff73,
- 0x04000000, 0xbf840028,
- 0x8e758675, 0x8e758275,
- 0xbefa0075, 0xb8ee2a05,
- 0x806e816e, 0x8e6e8a6e,
- 0xb8fa1605, 0x807a817a,
- 0x8e7a867a, 0x806e7a6e,
- 0x806eff6e, 0x00000080,
- 0xbefa00ff, 0x01000000,
- 0xbefc0080, 0xd28c0002,
- 0x000100c1, 0xd28d0003,
- 0x000204c1, 0xd1060002,
- 0x00011103, 0x7e0602ff,
- 0x00000200, 0xbefc00ff,
- 0x00010000, 0xbe80007b,
- 0x867bff7b, 0xff7fffff,
- 0x877bff7b, 0x00058000,
- 0xd8ec0000, 0x00000002,
- 0xbf8c007f, 0xe0765000,
- 0x6e1e0002, 0x32040702,
- 0xd0c9006a, 0x0000eb02,
- 0xbf87fff7, 0xbefb0000,
- 0xbeee00ff, 0x00000400,
- 0xbefe00c1, 0xbeff00c1,
- 0xb8f52a05, 0x80758175,
- 0x8e758275, 0x8e7a8875,
- 0xbefa00ff, 0x01000000,
- 0xbefc0084, 0xbf0a757c,
- 0xbf840015, 0xbf11017c,
- 0x8075ff75, 0x00001000,
- 0x7e000300, 0x7e020301,
- 0x7e040302, 0x7e060303,
- 0xe0724000, 0x6e1e0000,
- 0xe0724100, 0x6e1e0100,
- 0xe0724200, 0x6e1e0200,
- 0xe0724300, 0x6e1e0300,
- 0x807c847c, 0x806eff6e,
- 0x00000400, 0xbf0a757c,
- 0xbf85ffef, 0xbf9c0000,
- 0xbf8200ca, 0xbef8007e,
- 0x8679ff7f, 0x0000ffff,
- 0x8779ff79, 0x00040000,
- 0xbefa0080, 0xbefb00ff,
- 0x00807fac, 0x8676ff7f,
- 0x08000000, 0x8f768376,
- 0x877b767b, 0x8676ff7f,
- 0x70000000, 0x8f768176,
- 0x877b767b, 0x8676ff7f,
- 0x04000000, 0xbf84001e,
- 0xbefe00c1, 0xbeff00c1,
- 0xb8f34306, 0x8673c173,
- 0xbf840019, 0x8e738673,
- 0x8e738273, 0xbefa0073,
- 0xb8f22a05, 0x80728172,
- 0x8e728a72, 0xb8f61605,
- 0x80768176, 0x8e768676,
- 0x80727672, 0x8072ff72,
- 0x00000080, 0xbefa00ff,
- 0x01000000, 0xbefc0080,
- 0xe0510000, 0x721e0000,
- 0xe0510100, 0x721e0000,
- 0x807cff7c, 0x00000200,
- 0x8072ff72, 0x00000200,
- 0xbf0a737c, 0xbf85fff6,
- 0xbef20080, 0xbefe00c1,
- 0xbeff00c1, 0xb8f32a05,
- 0x80738173, 0x8e738273,
- 0x8e7a8873, 0xbefa00ff,
- 0x01000000, 0xbef60072,
- 0x8072ff72, 0x00000400,
- 0xbefc0084, 0xbf11087c,
- 0x8073ff73, 0x00008000,
- 0xe0524000, 0x721e0000,
- 0xe0524100, 0x721e0100,
- 0xe0524200, 0x721e0200,
- 0xe0524300, 0x721e0300,
- 0xbf8c0f70, 0x7e000300,
- 0x7e020301, 0x7e040302,
- 0x7e060303, 0x807c847c,
- 0x8072ff72, 0x00000400,
- 0xbf0a737c, 0xbf85ffee,
- 0xbf9c0000, 0xe0524000,
- 0x761e0000, 0xe0524100,
- 0x761e0100, 0xe0524200,
- 0x761e0200, 0xe0524300,
- 0x761e0300, 0xb8f22a05,
- 0x80728172, 0x8e728a72,
- 0xb8f61605, 0x80768176,
- 0x8e768676, 0x80727672,
- 0x80f2c072, 0xb8f31605,
- 0x80738173, 0x8e738473,
- 0x8e7a8273, 0xbefa00ff,
- 0x01000000, 0xbefc0073,
- 0xc031003c, 0x00000072,
- 0x80f2c072, 0xbf8c007f,
- 0x80fc907c, 0xbe802d00,
- 0xbe822d02, 0xbe842d04,
- 0xbe862d06, 0xbe882d08,
- 0xbe8a2d0a, 0xbe8c2d0c,
- 0xbe8e2d0e, 0xbf06807c,
- 0xbf84fff1, 0xb8f22a05,
- 0x80728172, 0x8e728a72,
- 0xb8f61605, 0x80768176,
- 0x8e768676, 0x80727672,
- 0xbefa0084, 0xbefa00ff,
- 0x01000000, 0xc0211cfc,
- 0x00000072, 0x80728472,
- 0xc0211c3c, 0x00000072,
- 0x80728472, 0xc0211c7c,
- 0x00000072, 0x80728472,
- 0xc0211bbc, 0x00000072,
- 0x80728472, 0xc0211bfc,
- 0x00000072, 0x80728472,
- 0xc0211d3c, 0x00000072,
- 0x80728472, 0xc0211d7c,
- 0x00000072, 0x80728472,
- 0xc0211a3c, 0x00000072,
- 0x80728472, 0xc0211a7c,
- 0x00000072, 0x80728472,
- 0xc0211dfc, 0x00000072,
- 0x80728472, 0xc0211b3c,
- 0x00000072, 0x80728472,
- 0xc0211b7c, 0x00000072,
- 0x80728472, 0xbf8c007f,
- 0x8671ff71, 0x0000ffff,
- 0xbefc0073, 0xbefe006e,
- 0xbeff006f, 0x867375ff,
- 0x000003ff, 0xb9734803,
- 0x867375ff, 0xfffff800,
- 0x8f738b73, 0xb973a2c3,
- 0xb977f801, 0x8673ff71,
- 0xf0000000, 0x8f739c73,
- 0x8e739073, 0xbef60080,
- 0x87767376, 0x8673ff71,
- 0x08000000, 0x8f739b73,
- 0x8e738f73, 0x87767376,
- 0x8673ff74, 0x00800000,
- 0x8f739773, 0xb976f807,
- 0x86fe7e7e, 0x86ea6a6a,
- 0xb974f802, 0xbf8a0000,
- 0x95807370, 0xbf810000,
-};
-
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
new file mode 100644
index 000000000000..998be96be736
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
@@ -0,0 +1,1214 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* To compile this assembly code:
+ * PROJECT=greenland ./sp3 cwsr_trap_handler_gfx9.asm -hex tmp.hex
+ */
+
+/* HW (GFX9) source code for CWSR trap handler */
+/* Version 18 + multiple trap handler */
+
+// this performance-optimal version was originally from Seven Xu at SRDC
+
+// Revison #18 --...
+/* Rev History
+** #1. Branch from gc dv. //gfxip/gfx9/main/src/test/suites/block/cs/sr/cs_trap_handler.sp3#1,#50, #51, #52-53(Skip, Already Fixed by PV), #54-56(merged),#57-58(mergerd, skiped-already fixed by PV)
+** #4. SR Memory Layout:
+** 1. VGPR-SGPR-HWREG-{LDS}
+** 2. tba_hi.bits.26 - reconfigured as the first wave in tg bits, for defer Save LDS for a threadgroup.. performance concern..
+** #5. Update: 1. Accurate g8sr_ts_save_d timestamp
+** #6. Update: 1. Fix s_barrier usage; 2. VGPR s/r using swizzle buffer?(NoNeed, already matched the swizzle pattern, more investigation)
+** #7. Update: 1. don't barrier if noLDS
+** #8. Branch: 1. Branch to ver#0, which is very similar to gc dv version
+** 2. Fix SQ issue by s_sleep 2
+** #9. Update: 1. Fix scc restore failed issue, restore wave_status at last
+** 2. optimize s_buffer save by burst 16sgprs...
+** #10. Update 1. Optimize restore sgpr by busrt 16 sgprs.
+** #11. Update 1. Add 2 more timestamp for debug version
+** #12. Update 1. Add VGPR SR using DWx4, some case improve and some case drop performance
+** #13. Integ 1. Always use MUBUF for PV trap shader...
+** #14. Update 1. s_buffer_store soft clause...
+** #15. Update 1. PERF - sclar write with glc:0/mtype0 to allow L2 combine. perf improvement a lot.
+** #16. Update 1. PRRF - UNROLL LDS_DMA got 2500cycle save in IP tree
+** #17. Update 1. FUNC - LDS_DMA has issues while ATC, replace with ds_read/buffer_store for save part[TODO restore part]
+** 2. PERF - Save LDS before save VGPR to cover LDS save long latency...
+** #18. Update 1. FUNC - Implicitly estore STATUS.VCCZ, which is not writable by s_setreg_b32
+** 2. FUNC - Handle non-CWSR traps
+*/
+
+var G8SR_WDMEM_HWREG_OFFSET = 0
+var G8SR_WDMEM_SGPR_OFFSET = 128 // in bytes
+
+// Keep definition same as the app shader, These 2 time stamps are part of the app shader... Should before any Save and after restore.
+
+var G8SR_DEBUG_TIMESTAMP = 0
+var G8SR_DEBUG_TS_SAVE_D_OFFSET = 40*4 // ts_save_d timestamp offset relative to SGPR_SR_memory_offset
+var s_g8sr_ts_save_s = s[34:35] // save start
+var s_g8sr_ts_sq_save_msg = s[36:37] // The save shader send SAVEWAVE msg to spi
+var s_g8sr_ts_spi_wrexec = s[38:39] // the SPI write the sr address to SQ
+var s_g8sr_ts_save_d = s[40:41] // save end
+var s_g8sr_ts_restore_s = s[42:43] // restore start
+var s_g8sr_ts_restore_d = s[44:45] // restore end
+
+var G8SR_VGPR_SR_IN_DWX4 = 0
+var G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 = 0x00100000 // DWx4 stride is 4*4Bytes
+var G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4 = G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4
+
+
+/*************************************************************************/
+/* control on how to run the shader */
+/*************************************************************************/
+//any hack that needs to be made to run this code in EMU (either because various EMU code are not ready or no compute save & restore in EMU run)
+var EMU_RUN_HACK = 0
+var EMU_RUN_HACK_RESTORE_NORMAL = 0
+var EMU_RUN_HACK_SAVE_NORMAL_EXIT = 0
+var EMU_RUN_HACK_SAVE_SINGLE_WAVE = 0
+var EMU_RUN_HACK_SAVE_FIRST_TIME = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK
+var SAVE_LDS = 1
+var WG_BASE_ADDR_LO = 0x9000a000
+var WG_BASE_ADDR_HI = 0x0
+var WAVE_SPACE = 0x5000 //memory size that each wave occupies in workgroup state mem
+var CTX_SAVE_CONTROL = 0x0
+var CTX_RESTORE_CONTROL = CTX_SAVE_CONTROL
+var SIM_RUN_HACK = 0 //any hack that needs to be made to run this code in SIM (either because various RTL code are not ready or no compute save & restore in RTL run)
+var SGPR_SAVE_USE_SQC = 1 //use SQC D$ to do the write
+var USE_MTBUF_INSTEAD_OF_MUBUF = 0 //because TC EMU currently asserts on 0 of // overload DFMT field to carry 4 more bits of stride for MUBUF opcodes
+var SWIZZLE_EN = 0 //whether we use swizzled buffer addressing
+var ACK_SQC_STORE = 1 //workaround for suspected SQC store bug causing incorrect stores under concurrency
+
+/**************************************************************************/
+/* variables */
+/**************************************************************************/
+var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23
+var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000
+var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1
+var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
+var SQ_WAVE_STATUS_HALT_MASK = 0x2000
+
+var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
+var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 6
+var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT = 24
+var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE = 3 //FIXME sq.blk still has 4 bits at this time while SQ programming guide has 3 bits
+
+var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400
+var SQ_WAVE_TRAPSTS_EXCE_MASK = 0x1FF // Exception mask
+var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10
+var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100
+var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8
+var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK = 0x3FF
+var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT = 0x0
+var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE = 10
+var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800
+var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11
+var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21
+var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK = 0x800
+
+var SQ_WAVE_IB_STS_RCNT_SHIFT = 16 //FIXME
+var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15 //FIXME
+var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x1F8000
+var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF //FIXME
+
+var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24
+var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27
+
+var TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT = 26 // bits [31:26] unused by SPI debug data
+var TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK = 0xFC000000
+
+/* Save */
+var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 //stride is 4 bytes
+var S_SAVE_BUF_RSRC_WORD3_MISC = 0x00807FAC //SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE
+
+var S_SAVE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit
+var S_SAVE_SPI_INIT_ATC_SHIFT = 27
+var S_SAVE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype
+var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28
+var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG
+var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26
+
+var S_SAVE_PC_HI_RCNT_SHIFT = 28 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used
+var S_SAVE_PC_HI_RCNT_MASK = 0xF0000000 //FIXME
+var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 27 //FIXME
+var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x08000000 //FIXME
+
+var s_save_spi_init_lo = exec_lo
+var s_save_spi_init_hi = exec_hi
+
+var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3'h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]}
+var s_save_pc_hi = ttmp1
+var s_save_exec_lo = ttmp2
+var s_save_exec_hi = ttmp3
+var s_save_tmp = ttmp4
+var s_save_trapsts = ttmp5 //not really used until the end of the SAVE routine
+var s_save_xnack_mask_lo = ttmp6
+var s_save_xnack_mask_hi = ttmp7
+var s_save_buf_rsrc0 = ttmp8
+var s_save_buf_rsrc1 = ttmp9
+var s_save_buf_rsrc2 = ttmp10
+var s_save_buf_rsrc3 = ttmp11
+var s_save_status = ttmp12
+var s_save_mem_offset = ttmp14
+var s_save_alloc_size = s_save_trapsts //conflict
+var s_save_m0 = ttmp15
+var s_save_ttmps_lo = s_save_tmp //no conflict
+var s_save_ttmps_hi = s_save_trapsts //no conflict
+
+/* Restore */
+var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE
+var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC
+
+var S_RESTORE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit
+var S_RESTORE_SPI_INIT_ATC_SHIFT = 27
+var S_RESTORE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype
+var S_RESTORE_SPI_INIT_MTYPE_SHIFT = 28
+var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG
+var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26
+
+var S_RESTORE_PC_HI_RCNT_SHIFT = S_SAVE_PC_HI_RCNT_SHIFT
+var S_RESTORE_PC_HI_RCNT_MASK = S_SAVE_PC_HI_RCNT_MASK
+var S_RESTORE_PC_HI_FIRST_REPLAY_SHIFT = S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
+var S_RESTORE_PC_HI_FIRST_REPLAY_MASK = S_SAVE_PC_HI_FIRST_REPLAY_MASK
+
+var s_restore_spi_init_lo = exec_lo
+var s_restore_spi_init_hi = exec_hi
+
+var s_restore_mem_offset = ttmp12
+var s_restore_alloc_size = ttmp3
+var s_restore_tmp = ttmp2
+var s_restore_mem_offset_save = s_restore_tmp //no conflict
+
+var s_restore_m0 = s_restore_alloc_size //no conflict
+
+var s_restore_mode = ttmp7
+
+var s_restore_pc_lo = ttmp0
+var s_restore_pc_hi = ttmp1
+var s_restore_exec_lo = ttmp14
+var s_restore_exec_hi = ttmp15
+var s_restore_status = ttmp4
+var s_restore_trapsts = ttmp5
+var s_restore_xnack_mask_lo = xnack_mask_lo
+var s_restore_xnack_mask_hi = xnack_mask_hi
+var s_restore_buf_rsrc0 = ttmp8
+var s_restore_buf_rsrc1 = ttmp9
+var s_restore_buf_rsrc2 = ttmp10
+var s_restore_buf_rsrc3 = ttmp11
+var s_restore_ttmps_lo = s_restore_tmp //no conflict
+var s_restore_ttmps_hi = s_restore_alloc_size //no conflict
+
+/**************************************************************************/
+/* trap handler entry points */
+/**************************************************************************/
+/* Shader Main*/
+
+shader main
+ asic(GFX9)
+ type(CS)
+
+
+ if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) //hack to use trap_id for determining save/restore
+ //FIXME VCCZ un-init assertion s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC
+ s_and_b32 s_save_tmp, s_save_pc_hi, 0xffff0000 //change SCC
+ s_cmp_eq_u32 s_save_tmp, 0x007e0000 //Save: trap_id = 0x7e. Restore: trap_id = 0x7f.
+ s_cbranch_scc0 L_JUMP_TO_RESTORE //do not need to recover STATUS here since we are going to RESTORE
+ //FIXME s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //need to recover STATUS since we are going to SAVE
+ s_branch L_SKIP_RESTORE //NOT restore, SAVE actually
+ else
+ s_branch L_SKIP_RESTORE //NOT restore. might be a regular trap or save
+ end
+
+L_JUMP_TO_RESTORE:
+ s_branch L_RESTORE //restore
+
+L_SKIP_RESTORE:
+
+ s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC
+ s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK //check whether this is for save
+ s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+ s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save
+ s_cbranch_scc1 L_SAVE //this is the operation for save
+
+ // ********* Handle non-CWSR traps *******************
+if (!EMU_RUN_HACK)
+ // Illegal instruction is a non-maskable exception which blocks context save.
+ // Halt the wavefront and return from the trap.
+ s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK
+ s_cbranch_scc1 L_HALT_WAVE
+
+ // If STATUS.MEM_VIOL is asserted then we cannot fetch from the TMA.
+ // Instead, halt the wavefront and return from the trap.
+ s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK
+ s_cbranch_scc0 L_FETCH_2ND_TRAP
+
+L_HALT_WAVE:
+ // If STATUS.HALT is set then this fault must come from SQC instruction fetch.
+ // We cannot prevent further faults so just terminate the wavefront.
+ s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+ s_cbranch_scc0 L_NOT_ALREADY_HALTED
+ s_endpgm
+L_NOT_ALREADY_HALTED:
+ s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+
+ // If the PC points to S_ENDPGM then context save will fail if STATUS.HALT is set.
+ // Rewind the PC to prevent this from occurring. The debugger compensates for this.
+ s_sub_u32 ttmp0, ttmp0, 0x8
+ s_subb_u32 ttmp1, ttmp1, 0x0
+
+L_FETCH_2ND_TRAP:
+ // Preserve and clear scalar XNACK state before issuing scalar reads.
+ // Save IB_STS.FIRST_REPLAY[15] and IB_STS.RCNT[20:16] into unused space ttmp11[31:26].
+ s_getreg_b32 ttmp2, hwreg(HW_REG_IB_STS)
+ s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
+ s_lshl_b32 ttmp3, ttmp3, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
+ s_andn2_b32 ttmp11, ttmp11, TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK
+ s_or_b32 ttmp11, ttmp11, ttmp3
+
+ s_andn2_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
+ s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
+
+ // Read second-level TBA/TMA from first-level TMA and jump if available.
+ // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
+ // ttmp12 holds SQ_WAVE_STATUS
+ s_getreg_b32 ttmp4, hwreg(HW_REG_SQ_SHADER_TMA_LO)
+ s_getreg_b32 ttmp5, hwreg(HW_REG_SQ_SHADER_TMA_HI)
+ s_lshl_b64 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8
+ s_load_dwordx2 [ttmp2, ttmp3], [ttmp4, ttmp5], 0x0 glc:1 // second-level TBA
+ s_waitcnt lgkmcnt(0)
+ s_load_dwordx2 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 glc:1 // second-level TMA
+ s_waitcnt lgkmcnt(0)
+ s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
+ s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set
+ s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler
+
+L_NO_NEXT_TRAP:
+ s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+ s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCE_MASK // Check whether it is an exception
+ s_cbranch_scc1 L_EXCP_CASE // Exception, jump back to the shader program directly.
+ s_add_u32 ttmp0, ttmp0, 4 // S_TRAP case, add 4 to ttmp0
+ s_addc_u32 ttmp1, ttmp1, 0
+L_EXCP_CASE:
+ s_and_b32 ttmp1, ttmp1, 0xFFFF
+
+ // Restore SQ_WAVE_IB_STS.
+ s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
+ s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
+ s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
+
+ // Restore SQ_WAVE_STATUS.
+ s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
+ s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
+ s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status
+
+ s_rfe_b64 [ttmp0, ttmp1]
+end
+ // ********* End handling of non-CWSR traps *******************
+
+/**************************************************************************/
+/* save routine */
+/**************************************************************************/
+
+L_SAVE:
+
+if G8SR_DEBUG_TIMESTAMP
+ s_memrealtime s_g8sr_ts_save_s
+ s_waitcnt lgkmcnt(0) //FIXME, will cause xnack??
+end
+
+ s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
+
+ s_mov_b32 s_save_tmp, 0 //clear saveCtx bit
+ s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit
+
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE) //save RCNT
+ s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT
+ s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT, SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE) //save FIRST_REPLAY
+ s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
+ s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS) //clear RCNT and FIRST_REPLAY in IB_STS
+ s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG
+
+ s_setreg_b32 hwreg(HW_REG_IB_STS), s_save_tmp
+
+ /* inform SPI the readiness and wait for SPI's go signal */
+ s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI
+ s_mov_b32 s_save_exec_hi, exec_hi
+ s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive
+
+if G8SR_DEBUG_TIMESTAMP
+ s_memrealtime s_g8sr_ts_sq_save_msg
+ s_waitcnt lgkmcnt(0)
+end
+
+ if (EMU_RUN_HACK)
+
+ else
+ s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC
+ end
+
+ // Set SPI_PRIO=2 to avoid starving instruction fetch in the waves we're waiting for.
+ s_or_b32 s_save_tmp, s_save_status, (2 << SQ_WAVE_STATUS_SPI_PRIO_SHIFT)
+ s_setreg_b32 hwreg(HW_REG_STATUS), s_save_tmp
+
+ L_SLEEP:
+ s_sleep 0x2 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0
+
+ if (EMU_RUN_HACK)
+
+ else
+ s_cbranch_execz L_SLEEP
+ end
+
+if G8SR_DEBUG_TIMESTAMP
+ s_memrealtime s_g8sr_ts_spi_wrexec
+ s_waitcnt lgkmcnt(0)
+end
+
+ if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_SINGLE_WAVE))
+ //calculate wd_addr using absolute thread id
+ v_readlane_b32 s_save_tmp, v9, 0
+ s_lshr_b32 s_save_tmp, s_save_tmp, 6
+ s_mul_i32 s_save_tmp, s_save_tmp, WAVE_SPACE
+ s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO
+ s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI
+ s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL
+ else
+ end
+ if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_SINGLE_WAVE))
+ s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO
+ s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI
+ s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL
+ else
+ end
+
+ // Save trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic
+ // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
+ get_vgpr_size_bytes(s_save_ttmps_lo)
+ get_sgpr_size_bytes(s_save_ttmps_hi)
+ s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_ttmps_hi
+ s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo
+ s_addc_u32 s_save_ttmps_hi, s_save_spi_init_hi, 0x0
+ s_and_b32 s_save_ttmps_hi, s_save_ttmps_hi, 0xFFFF
+ s_store_dwordx2 [ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x40 glc:1
+ ack_sqc_store_workaround()
+ s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x48 glc:1
+ ack_sqc_store_workaround()
+ s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x58 glc:1
+ ack_sqc_store_workaround()
+ s_store_dwordx2 [ttmp14, ttmp15], [s_save_ttmps_lo, s_save_ttmps_hi], 0x5C glc:1
+ ack_sqc_store_workaround()
+
+ /* setup Resource Contants */
+ s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo
+ s_and_b32 s_save_buf_rsrc1, s_save_spi_init_hi, 0x0000FFFF //base_addr_hi
+ s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE
+ s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited
+ s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC
+ s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK
+ s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position
+ s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or ATC
+ s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK
+ s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position
+ s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or MTYPE
+
+ //FIXME right now s_save_m0/s_save_mem_offset use tma_lo/tma_hi (might need to save them before using them?)
+ s_mov_b32 s_save_m0, m0 //save M0
+
+ /* global mem offset */
+ s_mov_b32 s_save_mem_offset, 0x0 //mem offset initial value = 0
+
+
+
+
+ /* save HW registers */
+ //////////////////////////////
+
+ L_SAVE_HWREG:
+ // HWREG SR memory offset : size(VGPR)+size(SGPR)
+ get_vgpr_size_bytes(s_save_mem_offset)
+ get_sgpr_size_bytes(s_save_tmp)
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
+
+
+ s_mov_b32 s_save_buf_rsrc2, 0x4 //NUM_RECORDS in bytes
+ if (SWIZZLE_EN)
+ s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+
+
+ write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) //M0
+
+ if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_FIRST_TIME))
+ s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4
+ s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over
+ end
+
+ write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset) //PC
+ write_hwreg_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset)
+ write_hwreg_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset) //EXEC
+ write_hwreg_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset)
+ write_hwreg_to_mem(s_save_status, s_save_buf_rsrc0, s_save_mem_offset) //STATUS
+
+ //s_save_trapsts conflicts with s_save_alloc_size
+ s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+ write_hwreg_to_mem(s_save_trapsts, s_save_buf_rsrc0, s_save_mem_offset) //TRAPSTS
+
+ write_hwreg_to_mem(xnack_mask_lo, s_save_buf_rsrc0, s_save_mem_offset) //XNACK_MASK_LO
+ write_hwreg_to_mem(xnack_mask_hi, s_save_buf_rsrc0, s_save_mem_offset) //XNACK_MASK_HI
+
+ //use s_save_tmp would introduce conflict here between s_save_tmp and s_save_buf_rsrc2
+ s_getreg_b32 s_save_m0, hwreg(HW_REG_MODE) //MODE
+ write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
+
+
+
+ /* the first wave in the threadgroup */
+ s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK // extract fisrt wave bit
+ s_mov_b32 s_save_exec_hi, 0x0
+ s_or_b32 s_save_exec_hi, s_save_tmp, s_save_exec_hi // save first wave bit to s_save_exec_hi.bits[26]
+
+
+ /* save SGPRs */
+ // Save SGPR before LDS save, then the s0 to s4 can be used during LDS save...
+ //////////////////////////////
+
+ // SGPR SR memory offset : size(VGPR)
+ get_vgpr_size_bytes(s_save_mem_offset)
+ // TODO, change RSRC word to rearrange memory layout for SGPRS
+
+ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size
+ s_add_u32 s_save_alloc_size, s_save_alloc_size, 1
+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value)
+
+ if (SGPR_SAVE_USE_SQC)
+ s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 2 //NUM_RECORDS in bytes
+ else
+ s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads)
+ end
+
+ if (SWIZZLE_EN)
+ s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+
+
+ // backup s_save_buf_rsrc0,1 to s_save_pc_lo/hi, since write_16sgpr_to_mem function will change the rsrc0
+ //s_mov_b64 s_save_pc_lo, s_save_buf_rsrc0
+ s_mov_b64 s_save_xnack_mask_lo, s_save_buf_rsrc0
+ s_add_u32 s_save_buf_rsrc0, s_save_buf_rsrc0, s_save_mem_offset
+ s_addc_u32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0
+
+ s_mov_b32 m0, 0x0 //SGPR initial index value =0
+ s_nop 0x0 //Manually inserted wait states
+ L_SAVE_SGPR_LOOP:
+ // SGPR is allocated in 16 SGPR granularity
+ s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0]
+ s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0]
+ s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0]
+ s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0]
+ s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0]
+ s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0]
+ s_movrels_b64 s12, s12 //s12 = s[12+m0], s13 = s[13+m0]
+ s_movrels_b64 s14, s14 //s14 = s[14+m0], s15 = s[15+m0]
+
+ write_16sgpr_to_mem(s0, s_save_buf_rsrc0, s_save_mem_offset) //PV: the best performance should be using s_buffer_store_dwordx4
+ s_add_u32 m0, m0, 16 //next sgpr index
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_SGPR_LOOP //SGPR save is complete?
+ // restore s_save_buf_rsrc0,1
+ //s_mov_b64 s_save_buf_rsrc0, s_save_pc_lo
+ s_mov_b64 s_save_buf_rsrc0, s_save_xnack_mask_lo
+
+
+
+
+ /* save first 4 VGPR, then LDS save could use */
+ // each wave will alloc 4 vgprs at least...
+ /////////////////////////////////////////////////////////////////////////////////////
+
+ s_mov_b32 s_save_mem_offset, 0
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+ s_mov_b32 xnack_mask_lo, 0x0
+ s_mov_b32 xnack_mask_hi, 0x0
+
+ if (SWIZZLE_EN)
+ s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+
+
+ // VGPR Allocated in 4-GPR granularity
+
+if G8SR_VGPR_SR_IN_DWX4
+ // the const stride for DWx4 is 4*4 bytes
+ s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0
+ s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes
+
+ buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+
+ s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0
+ s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE // reset const stride to 4 bytes
+else
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3
+end
+
+
+
+ /* save LDS */
+ //////////////////////////////
+
+ L_SAVE_LDS:
+
+ // Change EXEC to all threads...
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+
+ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) //lds_size
+ s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero?
+ s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE
+
+ s_barrier //LDS is used? wait for other waves in the same TG
+ s_and_b32 s_save_tmp, s_save_exec_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK //exec is still used here
+ s_cbranch_scc0 L_SAVE_LDS_DONE
+
+ // first wave do LDS save;
+
+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 6 //LDS size in dwords = lds_size * 64dw
+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //LDS size in bytes
+ s_mov_b32 s_save_buf_rsrc2, s_save_alloc_size //NUM_RECORDS in bytes
+
+ // LDS at offset: size(VGPR)+SIZE(SGPR)+SIZE(HWREG)
+ //
+ get_vgpr_size_bytes(s_save_mem_offset)
+ get_sgpr_size_bytes(s_save_tmp)
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, get_hwreg_size_bytes()
+
+
+ if (SWIZZLE_EN)
+ s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+
+ s_mov_b32 m0, 0x0 //lds_offset initial value = 0
+
+
+var LDS_DMA_ENABLE = 0
+var UNROLL = 0
+if UNROLL==0 && LDS_DMA_ENABLE==1
+ s_mov_b32 s3, 256*2
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ L_SAVE_LDS_LOOP:
+ //TODO: looks the 2 buffer_store/load clause for s/r will hurt performance.???
+ if (SAVE_LDS) //SPI always alloc LDS space in 128DW granularity
+ buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 // first 64DW
+ buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256 // second 64DW
+ end
+
+ s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s3 //mem offset increased by 256 bytes
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_LDS_LOOP //LDS save is complete?
+
+elsif LDS_DMA_ENABLE==1 && UNROLL==1 // UNROOL , has ichace miss
+ // store from higest LDS address to lowest
+ s_mov_b32 s3, 256*2
+ s_sub_u32 m0, s_save_alloc_size, s3
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, m0
+ s_lshr_b32 s_save_alloc_size, s_save_alloc_size, 9 // how many 128 trunks...
+ s_sub_u32 s_save_alloc_size, 128, s_save_alloc_size // store from higheset addr to lowest
+ s_mul_i32 s_save_alloc_size, s_save_alloc_size, 6*4 // PC offset increment, each LDS save block cost 6*4 Bytes instruction
+ s_add_u32 s_save_alloc_size, s_save_alloc_size, 3*4 //2is the below 2 inst...//s_addc and s_setpc
+ s_nop 0
+ s_nop 0
+ s_nop 0 //pad 3 dw to let LDS_DMA align with 64Bytes
+ s_getpc_b64 s[0:1] // reuse s[0:1], since s[0:1] already saved
+ s_add_u32 s0, s0,s_save_alloc_size
+ s_addc_u32 s1, s1, 0
+ s_setpc_b64 s[0:1]
+
+
+ for var i =0; i< 128; i++
+ // be careful to make here a 64Byte aligned address, which could improve performance...
+ buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:0 // first 64DW
+ buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256 // second 64DW
+
+ if i!=127
+ s_sub_u32 m0, m0, s3 // use a sgpr to shrink 2DW-inst to 1DW inst to improve performance , i.e. pack more LDS_DMA inst to one Cacheline
+ s_sub_u32 s_save_mem_offset, s_save_mem_offset, s3
+ end
+ end
+
+else // BUFFER_STORE
+ v_mbcnt_lo_u32_b32 v2, 0xffffffff, 0x0
+ v_mbcnt_hi_u32_b32 v3, 0xffffffff, v2 // tid
+ v_mul_i32_i24 v2, v3, 8 // tid*8
+ v_mov_b32 v3, 256*2
+ s_mov_b32 m0, 0x10000
+ s_mov_b32 s0, s_save_buf_rsrc3
+ s_and_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, 0xFF7FFFFF // disable add_tid
+ s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, 0x58000 //DFMT
+
+L_SAVE_LDS_LOOP_VECTOR:
+ ds_read_b64 v[0:1], v2 //x =LDS[a], byte address
+ s_waitcnt lgkmcnt(0)
+ buffer_store_dwordx2 v[0:1], v2, s_save_buf_rsrc0, s_save_mem_offset offen:1 glc:1 slc:1
+// s_waitcnt vmcnt(0)
+// v_add_u32 v2, vcc[0:1], v2, v3
+ v_add_u32 v2, v2, v3
+ v_cmp_lt_u32 vcc[0:1], v2, s_save_alloc_size
+ s_cbranch_vccnz L_SAVE_LDS_LOOP_VECTOR
+
+ // restore rsrc3
+ s_mov_b32 s_save_buf_rsrc3, s0
+
+end
+
+L_SAVE_LDS_DONE:
+
+
+ /* save VGPRs - set the Rest VGPRs */
+ //////////////////////////////////////////////////////////////////////////////////////
+ L_SAVE_VGPR:
+ // VGPR SR memory offset: 0
+ // TODO rearrange the RSRC words to use swizzle for VGPR save...
+
+ s_mov_b32 s_save_mem_offset, (0+256*4) // for the rest VGPRs
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+
+ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size
+ s_add_u32 s_save_alloc_size, s_save_alloc_size, 1
+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) //FIXME for GFX, zero is possible
+ s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4)
+ if (SWIZZLE_EN)
+ s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+
+
+ // VGPR Allocated in 4-GPR granularity
+
+if G8SR_VGPR_SR_IN_DWX4
+ // the const stride for DWx4 is 4*4 bytes
+ s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0
+ s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes
+
+ s_mov_b32 m0, 4 // skip first 4 VGPRs
+ s_cmp_lt_u32 m0, s_save_alloc_size
+ s_cbranch_scc0 L_SAVE_VGPR_LOOP_END // no more vgprs
+
+ s_set_gpr_idx_on m0, 0x1 // This will change M0
+ s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 // because above inst change m0
+L_SAVE_VGPR_LOOP:
+ v_mov_b32 v0, v0 // v0 = v[0+m0]
+ v_mov_b32 v1, v1
+ v_mov_b32 v2, v2
+ v_mov_b32 v3, v3
+
+
+ buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ s_add_u32 m0, m0, 4
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4
+ s_cmp_lt_u32 m0, s_save_alloc_size
+ s_cbranch_scc1 L_SAVE_VGPR_LOOP //VGPR save is complete?
+ s_set_gpr_idx_off
+L_SAVE_VGPR_LOOP_END:
+
+ s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0
+ s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE // reset const stride to 4 bytes
+else
+ // VGPR store using dw burst
+ s_mov_b32 m0, 0x4 //VGPR initial index value =0
+ s_cmp_lt_u32 m0, s_save_alloc_size
+ s_cbranch_scc0 L_SAVE_VGPR_END
+
+
+ s_set_gpr_idx_on m0, 0x1 //M0[7:0] = M0[7:0] and M0[15:12] = 0x1
+ s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 //add 0x1000 since we compare m0 against it later
+
+ L_SAVE_VGPR_LOOP:
+ v_mov_b32 v0, v0 //v0 = v[0+m0]
+ v_mov_b32 v1, v1 //v0 = v[0+m0]
+ v_mov_b32 v2, v2 //v0 = v[0+m0]
+ v_mov_b32 v3, v3 //v0 = v[0+m0]
+
+ if(USE_MTBUF_INSTEAD_OF_MUBUF)
+ tbuffer_store_format_x v0, v0, s_save_buf_rsrc0, s_save_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1
+ else
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3
+ end
+
+ s_add_u32 m0, m0, 4 //next vgpr index
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 //every buffer_store_dword does 256 bytes
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_VGPR_LOOP //VGPR save is complete?
+ s_set_gpr_idx_off
+end
+
+L_SAVE_VGPR_END:
+
+
+
+
+
+
+ /* S_PGM_END_SAVED */ //FIXME graphics ONLY
+ if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_NORMAL_EXIT))
+ s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
+ s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4
+ s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over
+ s_rfe_b64 s_save_pc_lo //Return to the main shader program
+ else
+ end
+
+// Save Done timestamp
+if G8SR_DEBUG_TIMESTAMP
+ s_memrealtime s_g8sr_ts_save_d
+ // SGPR SR memory offset : size(VGPR)
+ get_vgpr_size_bytes(s_save_mem_offset)
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, G8SR_DEBUG_TS_SAVE_D_OFFSET
+ s_waitcnt lgkmcnt(0) //FIXME, will cause xnack??
+ // Need reset rsrc2??
+ s_mov_b32 m0, s_save_mem_offset
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ s_buffer_store_dwordx2 s_g8sr_ts_save_d, s_save_buf_rsrc0, m0 glc:1
+end
+
+
+ s_branch L_END_PGM
+
+
+
+/**************************************************************************/
+/* restore routine */
+/**************************************************************************/
+
+L_RESTORE:
+ /* Setup Resource Contants */
+ if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL))
+ //calculate wd_addr using absolute thread id
+ v_readlane_b32 s_restore_tmp, v9, 0
+ s_lshr_b32 s_restore_tmp, s_restore_tmp, 6
+ s_mul_i32 s_restore_tmp, s_restore_tmp, WAVE_SPACE
+ s_add_i32 s_restore_spi_init_lo, s_restore_tmp, WG_BASE_ADDR_LO
+ s_mov_b32 s_restore_spi_init_hi, WG_BASE_ADDR_HI
+ s_and_b32 s_restore_spi_init_hi, s_restore_spi_init_hi, CTX_RESTORE_CONTROL
+ else
+ end
+
+if G8SR_DEBUG_TIMESTAMP
+ s_memrealtime s_g8sr_ts_restore_s
+ s_waitcnt lgkmcnt(0) //FIXME, will cause xnack??
+ // tma_lo/hi are sgpr 110, 111, which will not used for 112 SGPR allocated case...
+ s_mov_b32 s_restore_pc_lo, s_g8sr_ts_restore_s[0]
+ s_mov_b32 s_restore_pc_hi, s_g8sr_ts_restore_s[1] //backup ts to ttmp0/1, sicne exec will be finally restored..
+end
+
+
+
+ s_mov_b32 s_restore_buf_rsrc0, s_restore_spi_init_lo //base_addr_lo
+ s_and_b32 s_restore_buf_rsrc1, s_restore_spi_init_hi, 0x0000FFFF //base_addr_hi
+ s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE
+ s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes)
+ s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC
+ s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK
+ s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position
+ s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or ATC
+ s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK
+ s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position
+ s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or MTYPE
+
+ /* global mem offset */
+// s_mov_b32 s_restore_mem_offset, 0x0 //mem offset initial value = 0
+
+ /* the first wave in the threadgroup */
+ s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK
+ s_cbranch_scc0 L_RESTORE_VGPR
+
+ /* restore LDS */
+ //////////////////////////////
+ L_RESTORE_LDS:
+
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on //be consistent with SAVE although can be moved ahead
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+
+ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) //lds_size
+ s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //lds_size is zero?
+ s_cbranch_scc0 L_RESTORE_VGPR //no lds used? jump to L_RESTORE_VGPR
+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 6 //LDS size in dwords = lds_size * 64dw
+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //LDS size in bytes
+ s_mov_b32 s_restore_buf_rsrc2, s_restore_alloc_size //NUM_RECORDS in bytes
+
+ // LDS at offset: size(VGPR)+SIZE(SGPR)+SIZE(HWREG)
+ //
+ get_vgpr_size_bytes(s_restore_mem_offset)
+ get_sgpr_size_bytes(s_restore_tmp)
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_hwreg_size_bytes() //FIXME, Check if offset overflow???
+
+
+ if (SWIZZLE_EN)
+ s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+ s_mov_b32 m0, 0x0 //lds_offset initial value = 0
+
+ L_RESTORE_LDS_LOOP:
+ if (SAVE_LDS)
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:256 // second 64DW
+ end
+ s_add_u32 m0, m0, 256*2 // 128 DW
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*2 //mem offset increased by 128DW
+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_RESTORE_LDS_LOOP //LDS restore is complete?
+
+
+ /* restore VGPRs */
+ //////////////////////////////
+ L_RESTORE_VGPR:
+ // VGPR SR memory offset : 0
+ s_mov_b32 s_restore_mem_offset, 0x0
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on //be consistent with SAVE although can be moved ahead
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+
+ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size
+ s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1
+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value)
+ s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4)
+ if (SWIZZLE_EN)
+ s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+
+if G8SR_VGPR_SR_IN_DWX4
+ get_vgpr_size_bytes(s_restore_mem_offset)
+ s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4
+
+ // the const stride for DWx4 is 4*4 bytes
+ s_and_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, 0x0000FFFF // reset const stride to 0
+ s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes
+
+ s_mov_b32 m0, s_restore_alloc_size
+ s_set_gpr_idx_on m0, 0x8 // Note.. This will change m0
+
+L_RESTORE_VGPR_LOOP:
+ buffer_load_dwordx4 v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
+ s_waitcnt vmcnt(0)
+ s_sub_u32 m0, m0, 4
+ v_mov_b32 v0, v0 // v[0+m0] = v0
+ v_mov_b32 v1, v1
+ v_mov_b32 v2, v2
+ v_mov_b32 v3, v3
+ s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4
+ s_cmp_eq_u32 m0, 0x8000
+ s_cbranch_scc0 L_RESTORE_VGPR_LOOP
+ s_set_gpr_idx_off
+
+ s_and_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, 0x0000FFFF // reset const stride to 0
+ s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE // const stride to 4*4 bytes
+
+else
+ // VGPR load using dw burst
+ s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4
+ s_mov_b32 m0, 4 //VGPR initial index value = 1
+ s_set_gpr_idx_on m0, 0x8 //M0[7:0] = M0[7:0] and M0[15:12] = 0x8
+ s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 0x8000 //add 0x8000 since we compare m0 against it later
+
+ L_RESTORE_VGPR_LOOP:
+ if(USE_MTBUF_INSTEAD_OF_MUBUF)
+ tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1
+ else
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*3
+ end
+ s_waitcnt vmcnt(0) //ensure data ready
+ v_mov_b32 v0, v0 //v[0+m0] = v0
+ v_mov_b32 v1, v1
+ v_mov_b32 v2, v2
+ v_mov_b32 v3, v3
+ s_add_u32 m0, m0, 4 //next vgpr index
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 //every buffer_load_dword does 256 bytes
+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_RESTORE_VGPR_LOOP //VGPR restore (except v0) is complete?
+ s_set_gpr_idx_off
+ /* VGPR restore on v0 */
+ if(USE_MTBUF_INSTEAD_OF_MUBUF)
+ tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1
+ else
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*3
+ end
+
+end
+
+ /* restore SGPRs */
+ //////////////////////////////
+
+ // SGPR SR memory offset : size(VGPR)
+ get_vgpr_size_bytes(s_restore_mem_offset)
+ get_sgpr_size_bytes(s_restore_tmp)
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+ s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 16*4 // restore SGPR from S[n] to S[0], by 16 sgprs group
+ // TODO, change RSRC word to rearrange memory layout for SGPRS
+
+ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size
+ s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1
+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value)
+
+ if (SGPR_SAVE_USE_SQC)
+ s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 2 //NUM_RECORDS in bytes
+ else
+ s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads)
+ end
+ if (SWIZZLE_EN)
+ s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+
+ s_mov_b32 m0, s_restore_alloc_size
+
+ L_RESTORE_SGPR_LOOP:
+ read_16sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset) //PV: further performance improvement can be made
+ s_waitcnt lgkmcnt(0) //ensure data ready
+
+ s_sub_u32 m0, m0, 16 // Restore from S[n] to S[0]
+ s_nop 0 // hazard SALU M0=> S_MOVREL
+
+ s_movreld_b64 s0, s0 //s[0+m0] = s0
+ s_movreld_b64 s2, s2
+ s_movreld_b64 s4, s4
+ s_movreld_b64 s6, s6
+ s_movreld_b64 s8, s8
+ s_movreld_b64 s10, s10
+ s_movreld_b64 s12, s12
+ s_movreld_b64 s14, s14
+
+ s_cmp_eq_u32 m0, 0 //scc = (m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc0 L_RESTORE_SGPR_LOOP //SGPR restore (except s0) is complete?
+
+ /* restore HW registers */
+ //////////////////////////////
+ L_RESTORE_HWREG:
+
+
+if G8SR_DEBUG_TIMESTAMP
+ s_mov_b32 s_g8sr_ts_restore_s[0], s_restore_pc_lo
+ s_mov_b32 s_g8sr_ts_restore_s[1], s_restore_pc_hi
+end
+
+ // HWREG SR memory offset : size(VGPR)+size(SGPR)
+ get_vgpr_size_bytes(s_restore_mem_offset)
+ get_sgpr_size_bytes(s_restore_tmp)
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+
+
+ s_mov_b32 s_restore_buf_rsrc2, 0x4 //NUM_RECORDS in bytes
+ if (SWIZZLE_EN)
+ s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+
+ read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset) //M0
+ read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //PC
+ read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_exec_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //EXEC
+ read_hwreg_from_mem(s_restore_exec_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_status, s_restore_buf_rsrc0, s_restore_mem_offset) //STATUS
+ read_hwreg_from_mem(s_restore_trapsts, s_restore_buf_rsrc0, s_restore_mem_offset) //TRAPSTS
+ read_hwreg_from_mem(xnack_mask_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //XNACK_MASK_LO
+ read_hwreg_from_mem(xnack_mask_hi, s_restore_buf_rsrc0, s_restore_mem_offset) //XNACK_MASK_HI
+ read_hwreg_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset) //MODE
+
+ s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS
+
+ //for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise:
+ if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL))
+ s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8 //pc[31:0]+8 //two back-to-back s_trap are used (first for save and second for restore)
+ s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over
+ end
+ if ((EMU_RUN_HACK) && (EMU_RUN_HACK_RESTORE_NORMAL))
+ s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 4 //pc[31:0]+4 // save is hack through s_trap but restore is normal
+ s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over
+ end
+
+ s_mov_b32 m0, s_restore_m0
+ s_mov_b32 exec_lo, s_restore_exec_lo
+ s_mov_b32 exec_hi, s_restore_exec_hi
+
+ s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK, s_restore_trapsts
+ s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE), s_restore_m0
+ s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK, s_restore_trapsts
+ s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT
+ s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE), s_restore_m0
+ //s_setreg_b32 hwreg(HW_REG_TRAPSTS), s_restore_trapsts //don't overwrite SAVECTX bit as it may be set through external SAVECTX during restore
+ s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode
+
+ // Restore trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic
+ // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
+ get_vgpr_size_bytes(s_restore_ttmps_lo)
+ get_sgpr_size_bytes(s_restore_ttmps_hi)
+ s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_ttmps_hi
+ s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0
+ s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0
+ s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF
+ s_load_dwordx2 [ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x40 glc:1
+ s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x48 glc:1
+ s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x58 glc:1
+ s_load_dwordx2 [ttmp14, ttmp15], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x5C glc:1
+ s_waitcnt lgkmcnt(0)
+
+ //reuse s_restore_m0 as a temp register
+ s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK
+ s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT
+ s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT
+ s_mov_b32 s_restore_tmp, 0x0 //IB_STS is zero
+ s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
+ s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_FIRST_REPLAY_MASK
+ s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
+ s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT
+ s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
+ s_and_b32 s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK
+ s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT
+ s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp
+
+ s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
+ s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
+ s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
+ s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu
+
+ s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time
+
+if G8SR_DEBUG_TIMESTAMP
+ s_memrealtime s_g8sr_ts_restore_d
+ s_waitcnt lgkmcnt(0)
+end
+
+// s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution
+ s_rfe_restore_b64 s_restore_pc_lo, s_restore_m0 // s_restore_m0[0] is used to set STATUS.inst_atc
+
+
+/**************************************************************************/
+/* the END */
+/**************************************************************************/
+L_END_PGM:
+ s_endpgm
+
+end
+
+
+/**************************************************************************/
+/* the helper functions */
+/**************************************************************************/
+
+//Only for save hwreg to mem
+function write_hwreg_to_mem(s, s_rsrc, s_mem_offset)
+ s_mov_b32 exec_lo, m0 //assuming exec_lo is not needed anymore from this point on
+ s_mov_b32 m0, s_mem_offset
+ s_buffer_store_dword s, s_rsrc, m0 glc:1
+ ack_sqc_store_workaround()
+ s_add_u32 s_mem_offset, s_mem_offset, 4
+ s_mov_b32 m0, exec_lo
+end
+
+
+// HWREG are saved before SGPRs, so all HWREG could be use.
+function write_16sgpr_to_mem(s, s_rsrc, s_mem_offset)
+
+ s_buffer_store_dwordx4 s[0], s_rsrc, 0 glc:1
+ ack_sqc_store_workaround()
+ s_buffer_store_dwordx4 s[4], s_rsrc, 16 glc:1
+ ack_sqc_store_workaround()
+ s_buffer_store_dwordx4 s[8], s_rsrc, 32 glc:1
+ ack_sqc_store_workaround()
+ s_buffer_store_dwordx4 s[12], s_rsrc, 48 glc:1
+ ack_sqc_store_workaround()
+ s_add_u32 s_rsrc[0], s_rsrc[0], 4*16
+ s_addc_u32 s_rsrc[1], s_rsrc[1], 0x0 // +scc
+end
+
+
+function read_hwreg_from_mem(s, s_rsrc, s_mem_offset)
+ s_buffer_load_dword s, s_rsrc, s_mem_offset glc:1
+ s_add_u32 s_mem_offset, s_mem_offset, 4
+end
+
+function read_16sgpr_from_mem(s, s_rsrc, s_mem_offset)
+ s_buffer_load_dwordx16 s, s_rsrc, s_mem_offset glc:1
+ s_sub_u32 s_mem_offset, s_mem_offset, 4*16
+end
+
+
+
+function get_lds_size_bytes(s_lds_size_byte)
+ // SQ LDS granularity is 64DW, while PGM_RSRC2.lds_size is in granularity 128DW
+ s_getreg_b32 s_lds_size_byte, hwreg(HW_REG_LDS_ALLOC, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) // lds_size
+ s_lshl_b32 s_lds_size_byte, s_lds_size_byte, 8 //LDS size in dwords = lds_size * 64 *4Bytes // granularity 64DW
+end
+
+function get_vgpr_size_bytes(s_vgpr_size_byte)
+ s_getreg_b32 s_vgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size
+ s_add_u32 s_vgpr_size_byte, s_vgpr_size_byte, 1
+ s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+8) //Number of VGPRs = (vgpr_size + 1) * 4 * 64 * 4 (non-zero value) //FIXME for GFX, zero is possible
+end
+
+function get_sgpr_size_bytes(s_sgpr_size_byte)
+ s_getreg_b32 s_sgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size
+ s_add_u32 s_sgpr_size_byte, s_sgpr_size_byte, 1
+ s_lshl_b32 s_sgpr_size_byte, s_sgpr_size_byte, 6 //Number of SGPRs = (sgpr_size + 1) * 16 *4 (non-zero value)
+end
+
+function get_hwreg_size_bytes
+ return 128 //HWREG size 128 bytes
+end
+
+function ack_sqc_store_workaround
+ if ACK_SQC_STORE
+ s_waitcnt lgkmcnt(0)
+ end
+end
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 59808a39ecf4..f64c5551cdba 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -233,7 +233,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
pr_debug("Queue Size: 0x%llX, %u\n",
q_properties->queue_size, args->ring_size);
- pr_debug("Queue r/w Pointers: %p, %p\n",
+ pr_debug("Queue r/w Pointers: %px, %px\n",
q_properties->read_ptr,
q_properties->write_ptr);
@@ -292,8 +292,16 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
/* Return gpu_id as doorbell offset for mmap usage */
- args->doorbell_offset = (KFD_MMAP_DOORBELL_MASK | args->gpu_id);
+ args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
+ args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
args->doorbell_offset <<= PAGE_SHIFT;
+ if (KFD_IS_SOC15(dev->device_info->asic_family))
+ /* On SOC15 ASICs, doorbell allocation must be
+ * per-device, and independent from the per-process
+ * queue_id. Return the doorbell offset within the
+ * doorbell aperture to user mode.
+ */
+ args->doorbell_offset |= q_properties.doorbell_off;
mutex_unlock(&p->mutex);
@@ -1296,8 +1304,8 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
return -EINVAL;
}
- devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr),
- GFP_KERNEL);
+ devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
+ GFP_KERNEL);
if (!devices_arr)
return -ENOMEM;
@@ -1405,8 +1413,8 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
return -EINVAL;
}
- devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr),
- GFP_KERNEL);
+ devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
+ GFP_KERNEL);
if (!devices_arr)
return -ENOMEM;
@@ -1645,23 +1653,33 @@ err_i1:
static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct kfd_process *process;
+ struct kfd_dev *dev = NULL;
+ unsigned long vm_pgoff;
+ unsigned int gpu_id;
process = kfd_get_process(current);
if (IS_ERR(process))
return PTR_ERR(process);
- if ((vma->vm_pgoff & KFD_MMAP_DOORBELL_MASK) ==
- KFD_MMAP_DOORBELL_MASK) {
- vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_DOORBELL_MASK;
- return kfd_doorbell_mmap(process, vma);
- } else if ((vma->vm_pgoff & KFD_MMAP_EVENTS_MASK) ==
- KFD_MMAP_EVENTS_MASK) {
- vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_EVENTS_MASK;
+ vm_pgoff = vma->vm_pgoff;
+ vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff);
+ gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff);
+ if (gpu_id)
+ dev = kfd_device_by_id(gpu_id);
+
+ switch (vm_pgoff & KFD_MMAP_TYPE_MASK) {
+ case KFD_MMAP_TYPE_DOORBELL:
+ if (!dev)
+ return -ENODEV;
+ return kfd_doorbell_mmap(dev, process, vma);
+
+ case KFD_MMAP_TYPE_EVENTS:
return kfd_event_mmap(process, vma);
- } else if ((vma->vm_pgoff & KFD_MMAP_RESERVED_MEM_MASK) ==
- KFD_MMAP_RESERVED_MEM_MASK) {
- vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_RESERVED_MEM_MASK;
- return kfd_reserved_mem_mmap(process, vma);
+
+ case KFD_MMAP_TYPE_RESERVED_MEM:
+ if (!dev)
+ return -ENODEV;
+ return kfd_reserved_mem_mmap(dev, process, vma);
}
return -EFAULT;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 4f126ef6139b..296b3f230280 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -132,6 +132,9 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
#define fiji_cache_info carrizo_cache_info
#define polaris10_cache_info carrizo_cache_info
#define polaris11_cache_info carrizo_cache_info
+/* TODO - check & update Vega10 cache details */
+#define vega10_cache_info carrizo_cache_info
+#define raven_cache_info carrizo_cache_info
static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
struct crat_subtype_computeunit *cu)
@@ -603,6 +606,14 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
pcache_info = polaris11_cache_info;
num_of_cache_types = ARRAY_SIZE(polaris11_cache_info);
break;
+ case CHIP_VEGA10:
+ pcache_info = vega10_cache_info;
+ num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
+ break;
+ case CHIP_RAVEN:
+ pcache_info = raven_cache_info;
+ num_of_cache_types = ARRAY_SIZE(raven_cache_info);
+ break;
default:
return -EINVAL;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 3346699960dd..7ee6cec2c060 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -20,16 +20,13 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
-#include <linux/amd-iommu.h>
-#endif
#include <linux/bsearch.h>
#include <linux/pci.h>
#include <linux/slab.h>
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
#include "kfd_pm4_headers_vi.h"
-#include "cwsr_trap_handler_gfx8.asm"
+#include "cwsr_trap_handler.h"
#include "kfd_iommu.h"
#define MQD_SIZE_ALIGNED 768
@@ -41,6 +38,7 @@ static const struct kfd_device_info kaveri_device_info = {
.max_pasid_bits = 16,
/* max num of queues for KV.TODO should be a dynamic value */
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
@@ -55,6 +53,7 @@ static const struct kfd_device_info carrizo_device_info = {
.max_pasid_bits = 16,
/* max num of queues for CZ.TODO should be a dynamic value */
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
@@ -70,6 +69,7 @@ static const struct kfd_device_info hawaii_device_info = {
.max_pasid_bits = 16,
/* max num of queues for KV.TODO should be a dynamic value */
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
@@ -83,6 +83,7 @@ static const struct kfd_device_info tonga_device_info = {
.asic_family = CHIP_TONGA,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
@@ -96,6 +97,7 @@ static const struct kfd_device_info tonga_vf_device_info = {
.asic_family = CHIP_TONGA,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
@@ -109,6 +111,7 @@ static const struct kfd_device_info fiji_device_info = {
.asic_family = CHIP_FIJI,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
@@ -122,6 +125,7 @@ static const struct kfd_device_info fiji_vf_device_info = {
.asic_family = CHIP_FIJI,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
@@ -136,6 +140,7 @@ static const struct kfd_device_info polaris10_device_info = {
.asic_family = CHIP_POLARIS10,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
@@ -149,6 +154,7 @@ static const struct kfd_device_info polaris10_vf_device_info = {
.asic_family = CHIP_POLARIS10,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
@@ -162,6 +168,7 @@ static const struct kfd_device_info polaris11_device_info = {
.asic_family = CHIP_POLARIS11,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
@@ -171,6 +178,34 @@ static const struct kfd_device_info polaris11_device_info = {
.needs_pci_atomics = true,
};
+static const struct kfd_device_info vega10_device_info = {
+ .asic_family = CHIP_VEGA10,
+ .max_pasid_bits = 16,
+ .max_no_of_hqd = 24,
+ .doorbell_size = 8,
+ .ih_ring_entry_size = 8 * sizeof(uint32_t),
+ .event_interrupt_class = &event_interrupt_class_v9,
+ .num_of_watch_points = 4,
+ .mqd_size_aligned = MQD_SIZE_ALIGNED,
+ .supports_cwsr = true,
+ .needs_iommu_device = false,
+ .needs_pci_atomics = false,
+};
+
+static const struct kfd_device_info vega10_vf_device_info = {
+ .asic_family = CHIP_VEGA10,
+ .max_pasid_bits = 16,
+ .max_no_of_hqd = 24,
+ .doorbell_size = 8,
+ .ih_ring_entry_size = 8 * sizeof(uint32_t),
+ .event_interrupt_class = &event_interrupt_class_v9,
+ .num_of_watch_points = 4,
+ .mqd_size_aligned = MQD_SIZE_ALIGNED,
+ .supports_cwsr = true,
+ .needs_iommu_device = false,
+ .needs_pci_atomics = false,
+};
+
struct kfd_deviceid {
unsigned short did;
@@ -250,6 +285,15 @@ static const struct kfd_deviceid supported_devices[] = {
{ 0x67EB, &polaris11_device_info }, /* Polaris11 */
{ 0x67EF, &polaris11_device_info }, /* Polaris11 */
{ 0x67FF, &polaris11_device_info }, /* Polaris11 */
+ { 0x6860, &vega10_device_info }, /* Vega10 */
+ { 0x6861, &vega10_device_info }, /* Vega10 */
+ { 0x6862, &vega10_device_info }, /* Vega10 */
+ { 0x6863, &vega10_device_info }, /* Vega10 */
+ { 0x6864, &vega10_device_info }, /* Vega10 */
+ { 0x6867, &vega10_device_info }, /* Vega10 */
+ { 0x6868, &vega10_device_info }, /* Vega10 */
+ { 0x686C, &vega10_vf_device_info }, /* Vega10 vf*/
+ { 0x687F, &vega10_device_info }, /* Vega10 */
};
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
@@ -279,7 +323,7 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
struct pci_dev *pdev, const struct kfd2kgd_calls *f2g)
{
struct kfd_dev *kfd;
-
+ int ret;
const struct kfd_device_info *device_info =
lookup_device_info(pdev->device);
@@ -288,19 +332,18 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
return NULL;
}
- if (device_info->needs_pci_atomics) {
- /* Allow BIF to recode atomics to PCIe 3.0
- * AtomicOps. 32 and 64-bit requests are possible and
- * must be supported.
- */
- if (pci_enable_atomic_ops_to_root(pdev,
- PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
- PCI_EXP_DEVCAP2_ATOMIC_COMP64) < 0) {
- dev_info(kfd_device,
- "skipped device %x:%x, PCI rejects atomics",
- pdev->vendor, pdev->device);
- return NULL;
- }
+ /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps.
+ * 32 and 64-bit requests are possible and must be
+ * supported.
+ */
+ ret = pci_enable_atomic_ops_to_root(pdev,
+ PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
+ PCI_EXP_DEVCAP2_ATOMIC_COMP64);
+ if (device_info->needs_pci_atomics && ret < 0) {
+ dev_info(kfd_device,
+ "skipped device %x:%x, PCI rejects atomics\n",
+ pdev->vendor, pdev->device);
+ return NULL;
}
kfd = kzalloc(sizeof(*kfd), GFP_KERNEL);
@@ -323,10 +366,16 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
static void kfd_cwsr_init(struct kfd_dev *kfd)
{
if (cwsr_enable && kfd->device_info->supports_cwsr) {
- BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
+ if (kfd->device_info->asic_family < CHIP_VEGA10) {
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
+ kfd->cwsr_isa = cwsr_trap_gfx8_hex;
+ kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
+ } else {
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE);
+ kfd->cwsr_isa = cwsr_trap_gfx9_hex;
+ kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex);
+ }
- kfd->cwsr_isa = cwsr_trap_gfx8_hex;
- kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
kfd->cwsr_enabled = true;
}
}
@@ -541,6 +590,44 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
spin_unlock(&kfd->interrupt_lock);
}
+int kgd2kfd_quiesce_mm(struct mm_struct *mm)
+{
+ struct kfd_process *p;
+ int r;
+
+ /* Because we are called from arbitrary context (workqueue) as opposed
+ * to process context, kfd_process could attempt to exit while we are
+ * running so the lookup function increments the process ref count.
+ */
+ p = kfd_lookup_process_by_mm(mm);
+ if (!p)
+ return -ESRCH;
+
+ r = kfd_process_evict_queues(p);
+
+ kfd_unref_process(p);
+ return r;
+}
+
+int kgd2kfd_resume_mm(struct mm_struct *mm)
+{
+ struct kfd_process *p;
+ int r;
+
+ /* Because we are called from arbitrary context (workqueue) as opposed
+ * to process context, kfd_process could attempt to exit while we are
+ * running so the lookup function increments the process ref count.
+ */
+ p = kfd_lookup_process_by_mm(mm);
+ if (!p)
+ return -ESRCH;
+
+ r = kfd_process_restore_queues(p);
+
+ kfd_unref_process(p);
+ return r;
+}
+
/** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will
* prepare for safe eviction of KFD BOs that belong to the specified
* process.
@@ -652,7 +739,7 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
return -ENOMEM;
- *mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
+ *mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
if ((*mem_obj) == NULL)
return -ENOMEM;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index d55d29d31da4..668ad07ebe1f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -110,6 +110,57 @@ void program_sh_mem_settings(struct device_queue_manager *dqm,
qpd->sh_mem_bases);
}
+static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
+{
+ struct kfd_dev *dev = qpd->dqm->dev;
+
+ if (!KFD_IS_SOC15(dev->device_info->asic_family)) {
+ /* On pre-SOC15 chips we need to use the queue ID to
+ * preserve the user mode ABI.
+ */
+ q->doorbell_id = q->properties.queue_id;
+ } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
+ /* For SDMA queues on SOC15, use static doorbell
+ * assignments based on the engine and queue.
+ */
+ q->doorbell_id = dev->shared_resources.sdma_doorbell
+ [q->properties.sdma_engine_id]
+ [q->properties.sdma_queue_id];
+ } else {
+ /* For CP queues on SOC15 reserve a free doorbell ID */
+ unsigned int found;
+
+ found = find_first_zero_bit(qpd->doorbell_bitmap,
+ KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
+ if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
+ pr_debug("No doorbells available");
+ return -EBUSY;
+ }
+ set_bit(found, qpd->doorbell_bitmap);
+ q->doorbell_id = found;
+ }
+
+ q->properties.doorbell_off =
+ kfd_doorbell_id_to_offset(dev, q->process,
+ q->doorbell_id);
+
+ return 0;
+}
+
+static void deallocate_doorbell(struct qcm_process_device *qpd,
+ struct queue *q)
+{
+ unsigned int old;
+ struct kfd_dev *dev = qpd->dqm->dev;
+
+ if (!KFD_IS_SOC15(dev->device_info->asic_family) ||
+ q->properties.type == KFD_QUEUE_TYPE_SDMA)
+ return;
+
+ old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
+ WARN_ON(!old);
+}
+
static int allocate_vmid(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
struct queue *q)
@@ -145,15 +196,19 @@ static int allocate_vmid(struct device_queue_manager *dqm,
static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
struct qcm_process_device *qpd)
{
- uint32_t len;
+ const struct packet_manager_funcs *pmf = qpd->dqm->packets.pmf;
+ int ret;
if (!qpd->ib_kaddr)
return -ENOMEM;
- len = pm_create_release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
+ ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
+ if (ret)
+ return ret;
return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid,
- qpd->ib_base, (uint32_t *)qpd->ib_kaddr, len);
+ qpd->ib_base, (uint32_t *)qpd->ib_kaddr,
+ pmf->release_mem_size / sizeof(uint32_t));
}
static void deallocate_vmid(struct device_queue_manager *dqm,
@@ -301,10 +356,14 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
if (retval)
return retval;
+ retval = allocate_doorbell(qpd, q);
+ if (retval)
+ goto out_deallocate_hqd;
+
retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
&q->gart_mqd_addr, &q->properties);
if (retval)
- goto out_deallocate_hqd;
+ goto out_deallocate_doorbell;
pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
q->pipe, q->queue);
@@ -324,6 +383,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
out_uninit_mqd:
mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
+out_deallocate_doorbell:
+ deallocate_doorbell(qpd, q);
out_deallocate_hqd:
deallocate_hqd(dqm, q);
@@ -357,6 +418,8 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
}
dqm->total_queue_count--;
+ deallocate_doorbell(qpd, q);
+
retval = mqd->destroy_mqd(mqd, q->mqd,
KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
KFD_UNMAP_LATENCY_MS,
@@ -861,6 +924,10 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
q->properties.sdma_queue_id = q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE;
q->properties.sdma_engine_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE;
+ retval = allocate_doorbell(qpd, q);
+ if (retval)
+ goto out_deallocate_sdma_queue;
+
pr_debug("SDMA id is: %d\n", q->sdma_id);
pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
@@ -869,7 +936,7 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
&q->gart_mqd_addr, &q->properties);
if (retval)
- goto out_deallocate_sdma_queue;
+ goto out_deallocate_doorbell;
retval = mqd->load_mqd(mqd, q->mqd, 0, 0, &q->properties, NULL);
if (retval)
@@ -879,6 +946,8 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
out_uninit_mqd:
mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
+out_deallocate_doorbell:
+ deallocate_doorbell(qpd, q);
out_deallocate_sdma_queue:
deallocate_sdma_queue(dqm, q->sdma_id);
@@ -1070,12 +1139,17 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
q->properties.sdma_engine_id =
q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE;
}
+
+ retval = allocate_doorbell(qpd, q);
+ if (retval)
+ goto out_deallocate_sdma_queue;
+
mqd = dqm->ops.get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type));
if (!mqd) {
retval = -ENOMEM;
- goto out_deallocate_sdma_queue;
+ goto out_deallocate_doorbell;
}
/*
* Eviction state logic: we only mark active queues as evicted
@@ -1093,7 +1167,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
&q->gart_mqd_addr, &q->properties);
if (retval)
- goto out_deallocate_sdma_queue;
+ goto out_deallocate_doorbell;
list_add(&q->list, &qpd->queues_list);
qpd->queue_count++;
@@ -1117,6 +1191,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
mutex_unlock(&dqm->lock);
return retval;
+out_deallocate_doorbell:
+ deallocate_doorbell(qpd, q);
out_deallocate_sdma_queue:
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
deallocate_sdma_queue(dqm, q->sdma_id);
@@ -1257,6 +1333,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
goto failed;
}
+ deallocate_doorbell(qpd, q);
+
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
dqm->sdma_queue_count--;
deallocate_sdma_queue(dqm, q->sdma_id);
@@ -1308,7 +1386,10 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
void __user *alternate_aperture_base,
uint64_t alternate_aperture_size)
{
- bool retval;
+ bool retval = true;
+
+ if (!dqm->asic_ops.set_cache_memory_policy)
+ return retval;
mutex_lock(&dqm->lock);
@@ -1577,6 +1658,11 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
case CHIP_POLARIS11:
device_queue_manager_init_vi_tonga(&dqm->asic_ops);
break;
+
+ case CHIP_VEGA10:
+ case CHIP_RAVEN:
+ device_queue_manager_init_v9(&dqm->asic_ops);
+ break;
default:
WARN(1, "Unexpected ASIC family %u",
dev->device_info->asic_family);
@@ -1627,6 +1713,18 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)
int pipe, queue;
int r = 0;
+ r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd,
+ KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE, &dump, &n_regs);
+ if (!r) {
+ seq_printf(m, " HIQ on MEC %d Pipe %d Queue %d\n",
+ KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1,
+ KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm),
+ KFD_CIK_HIQ_QUEUE);
+ seq_reg_dump(m, dump, n_regs);
+
+ kfree(dump);
+ }
+
for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
int pipe_offset = pipe * get_queues_per_pipe(dqm);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 412beff3281d..59a6b1956932 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -200,6 +200,8 @@ void device_queue_manager_init_vi(
struct device_queue_manager_asic_ops *asic_ops);
void device_queue_manager_init_vi_tonga(
struct device_queue_manager_asic_ops *asic_ops);
+void device_queue_manager_init_v9(
+ struct device_queue_manager_asic_ops *asic_ops);
void program_sh_mem_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
unsigned int get_queues_num(struct device_queue_manager *dqm);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
new file mode 100644
index 000000000000..79e5bcf6367c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "kfd_device_queue_manager.h"
+#include "vega10_enum.h"
+#include "gc/gc_9_0_offset.h"
+#include "gc/gc_9_0_sh_mask.h"
+#include "sdma0/sdma0_4_0_sh_mask.h"
+
+static int update_qpd_v9(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd);
+static void init_sdma_vm_v9(struct device_queue_manager *dqm, struct queue *q,
+ struct qcm_process_device *qpd);
+
+void device_queue_manager_init_v9(
+ struct device_queue_manager_asic_ops *asic_ops)
+{
+ asic_ops->update_qpd = update_qpd_v9;
+ asic_ops->init_sdma_vm = init_sdma_vm_v9;
+}
+
+static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
+{
+ uint32_t shared_base = pdd->lds_base >> 48;
+ uint32_t private_base = pdd->scratch_base >> 48;
+
+ return (shared_base << SH_MEM_BASES__SHARED_BASE__SHIFT) |
+ private_base;
+}
+
+static int update_qpd_v9(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
+ struct kfd_process_device *pdd;
+
+ pdd = qpd_to_pdd(qpd);
+
+ /* check if sh_mem_config register already configured */
+ if (qpd->sh_mem_config == 0) {
+ qpd->sh_mem_config =
+ SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
+ if (vega10_noretry &&
+ !dqm->dev->device_info->needs_iommu_device)
+ qpd->sh_mem_config |=
+ 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
+
+ qpd->sh_mem_ape1_limit = 0;
+ qpd->sh_mem_ape1_base = 0;
+ }
+
+ qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd);
+
+ pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases);
+
+ return 0;
+}
+
+static void init_sdma_vm_v9(struct device_queue_manager *dqm, struct queue *q,
+ struct qcm_process_device *qpd)
+{
+ /* Not needed on SDMAv4 any more */
+ q->properties.sdma_vm_addr = 0;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index ebb4da14e3df..c3744d89352c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -33,7 +33,6 @@
static DEFINE_IDA(doorbell_ida);
static unsigned int max_doorbell_slices;
-#define KFD_SIZE_OF_DOORBELL_IN_BYTES 4
/*
* Each device exposes a doorbell aperture, a PCI MMIO aperture that
@@ -50,9 +49,9 @@ static unsigned int max_doorbell_slices;
*/
/* # of doorbell bytes allocated for each process. */
-static inline size_t doorbell_process_allocation(void)
+size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
{
- return roundup(KFD_SIZE_OF_DOORBELL_IN_BYTES *
+ return roundup(kfd->device_info->doorbell_size *
KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
PAGE_SIZE);
}
@@ -72,16 +71,16 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
doorbell_start_offset =
roundup(kfd->shared_resources.doorbell_start_offset,
- doorbell_process_allocation());
+ kfd_doorbell_process_slice(kfd));
doorbell_aperture_size =
rounddown(kfd->shared_resources.doorbell_aperture_size,
- doorbell_process_allocation());
+ kfd_doorbell_process_slice(kfd));
if (doorbell_aperture_size > doorbell_start_offset)
doorbell_process_limit =
(doorbell_aperture_size - doorbell_start_offset) /
- doorbell_process_allocation();
+ kfd_doorbell_process_slice(kfd);
else
return -ENOSPC;
@@ -95,7 +94,7 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
kfd->doorbell_id_offset = doorbell_start_offset / sizeof(u32);
kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
- doorbell_process_allocation());
+ kfd_doorbell_process_slice(kfd));
if (!kfd->doorbell_kernel_ptr)
return -ENOMEM;
@@ -127,21 +126,16 @@ void kfd_doorbell_fini(struct kfd_dev *kfd)
iounmap(kfd->doorbell_kernel_ptr);
}
-int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma)
+int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
+ struct vm_area_struct *vma)
{
phys_addr_t address;
- struct kfd_dev *dev;
/*
* For simplicitly we only allow mapping of the entire doorbell
* allocation of a single device & process.
*/
- if (vma->vm_end - vma->vm_start != doorbell_process_allocation())
- return -EINVAL;
-
- /* Find kfd device according to gpu id */
- dev = kfd_device_by_id(vma->vm_pgoff);
- if (!dev)
+ if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev))
return -EINVAL;
/* Calculate physical address of doorbell */
@@ -158,19 +152,19 @@ int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma)
" vm_flags == 0x%04lX\n"
" size == 0x%04lX\n",
(unsigned long long) vma->vm_start, address, vma->vm_flags,
- doorbell_process_allocation());
+ kfd_doorbell_process_slice(dev));
return io_remap_pfn_range(vma,
vma->vm_start,
address >> PAGE_SHIFT,
- doorbell_process_allocation(),
+ kfd_doorbell_process_slice(dev),
vma->vm_page_prot);
}
/* get kernel iomem pointer for a doorbell */
-u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
+void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
unsigned int *doorbell_off)
{
u32 inx;
@@ -185,6 +179,8 @@ u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
return NULL;
+ inx *= kfd->device_info->doorbell_size / sizeof(u32);
+
/*
* Calculating the kernel doorbell offset using the first
* doorbell page.
@@ -210,7 +206,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
mutex_unlock(&kfd->doorbell_mutex);
}
-inline void write_kernel_doorbell(u32 __iomem *db, u32 value)
+void write_kernel_doorbell(void __iomem *db, u32 value)
{
if (db) {
writel(value, db);
@@ -218,30 +214,37 @@ inline void write_kernel_doorbell(u32 __iomem *db, u32 value)
}
}
-/*
- * queue_ids are in the range [0,MAX_PROCESS_QUEUES) and are mapped 1:1
- * to doorbells with the process's doorbell page
- */
-unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd,
+void write_kernel_doorbell64(void __iomem *db, u64 value)
+{
+ if (db) {
+ WARN(((unsigned long)db & 7) != 0,
+ "Unaligned 64-bit doorbell");
+ writeq(value, (u64 __iomem *)db);
+ pr_debug("writing %llu to doorbell address %p\n", value, db);
+ }
+}
+
+unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd,
struct kfd_process *process,
- unsigned int queue_id)
+ unsigned int doorbell_id)
{
/*
* doorbell_id_offset accounts for doorbells taken by KGD.
- * index * doorbell_process_allocation/sizeof(u32) adjusts to
- * the process's doorbells.
+ * index * kfd_doorbell_process_slice/sizeof(u32) adjusts to
+ * the process's doorbells. The offset returned is in dword
+ * units regardless of the ASIC-dependent doorbell size.
*/
return kfd->doorbell_id_offset +
process->doorbell_index
- * doorbell_process_allocation() / sizeof(u32) +
- queue_id;
+ * kfd_doorbell_process_slice(kfd) / sizeof(u32) +
+ doorbell_id * kfd->device_info->doorbell_size / sizeof(u32);
}
uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
{
uint64_t num_of_elems = (kfd->shared_resources.doorbell_aperture_size -
kfd->shared_resources.doorbell_start_offset) /
- doorbell_process_allocation() + 1;
+ kfd_doorbell_process_slice(kfd) + 1;
return num_of_elems;
@@ -251,7 +254,7 @@ phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
struct kfd_process *process)
{
return dev->doorbell_base +
- process->doorbell_index * doorbell_process_allocation();
+ process->doorbell_index * kfd_doorbell_process_slice(dev);
}
int kfd_alloc_process_doorbells(struct kfd_process *process)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 4890a90f1e44..5562e94e786a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -345,7 +345,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
case KFD_EVENT_TYPE_DEBUG:
ret = create_signal_event(devkfd, p, ev);
if (!ret) {
- *event_page_offset = KFD_MMAP_EVENTS_MASK;
+ *event_page_offset = KFD_MMAP_TYPE_EVENTS;
*event_page_offset <<= PAGE_SHIFT;
*event_slot_index = ev->event_id;
}
@@ -496,7 +496,7 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
pr_debug_ratelimited("Partial ID invalid: %u (%u valid bits)\n",
partial_id, valid_id_bits);
- if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT/2) {
+ if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT / 64) {
/* With relatively few events, it's faster to
* iterate over the event IDR
*/
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index 66852de410c8..97d5423c5673 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -275,23 +275,35 @@
* for FLAT_* / S_LOAD operations.
*/
-#define MAKE_GPUVM_APP_BASE(gpu_num) \
+#define MAKE_GPUVM_APP_BASE_VI(gpu_num) \
(((uint64_t)(gpu_num) << 61) + 0x1000000000000L)
#define MAKE_GPUVM_APP_LIMIT(base, size) \
(((uint64_t)(base) & 0xFFFFFF0000000000UL) + (size) - 1)
-#define MAKE_SCRATCH_APP_BASE() \
+#define MAKE_SCRATCH_APP_BASE_VI() \
(((uint64_t)(0x1UL) << 61) + 0x100000000L)
#define MAKE_SCRATCH_APP_LIMIT(base) \
(((uint64_t)base & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
-#define MAKE_LDS_APP_BASE() \
+#define MAKE_LDS_APP_BASE_VI() \
(((uint64_t)(0x1UL) << 61) + 0x0)
#define MAKE_LDS_APP_LIMIT(base) \
(((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
+/* On GFXv9 the LDS and scratch apertures are programmed independently
+ * using the high 16 bits of the 64-bit virtual address. They must be
+ * in the hole, which will be the case as long as the high 16 bits are
+ * not 0.
+ *
+ * The aperture sizes are still 4GB implicitly.
+ *
+ * A GPUVM aperture is not applicable on GFXv9.
+ */
+#define MAKE_LDS_APP_BASE_V9() ((uint64_t)(0x1UL) << 48)
+#define MAKE_SCRATCH_APP_BASE_V9() ((uint64_t)(0x2UL) << 48)
+
/* User mode manages most of the SVM aperture address space. The low
* 16MB are reserved for kernel use (CWSR trap handler and kernel IB
* for now).
@@ -300,6 +312,55 @@
#define SVM_CWSR_BASE (SVM_USER_BASE - KFD_CWSR_TBA_TMA_SIZE)
#define SVM_IB_BASE (SVM_CWSR_BASE - PAGE_SIZE)
+static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id)
+{
+ /*
+ * node id couldn't be 0 - the three MSB bits of
+ * aperture shoudn't be 0
+ */
+ pdd->lds_base = MAKE_LDS_APP_BASE_VI();
+ pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
+
+ if (!pdd->dev->device_info->needs_iommu_device) {
+ /* dGPUs: SVM aperture starting at 0
+ * with small reserved space for kernel.
+ * Set them to CANONICAL addresses.
+ */
+ pdd->gpuvm_base = SVM_USER_BASE;
+ pdd->gpuvm_limit =
+ pdd->dev->shared_resources.gpuvm_size - 1;
+ } else {
+ /* set them to non CANONICAL addresses, and no SVM is
+ * allocated.
+ */
+ pdd->gpuvm_base = MAKE_GPUVM_APP_BASE_VI(id + 1);
+ pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT(pdd->gpuvm_base,
+ pdd->dev->shared_resources.gpuvm_size);
+ }
+
+ pdd->scratch_base = MAKE_SCRATCH_APP_BASE_VI();
+ pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
+}
+
+static void kfd_init_apertures_v9(struct kfd_process_device *pdd, uint8_t id)
+{
+ pdd->lds_base = MAKE_LDS_APP_BASE_V9();
+ pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
+
+ /* Raven needs SVM to support graphic handle, etc. Leave the small
+ * reserved space before SVM on Raven as well, even though we don't
+ * have to.
+ * Set gpuvm_base and gpuvm_limit to CANONICAL addresses so that they
+ * are used in Thunk to reserve SVM.
+ */
+ pdd->gpuvm_base = SVM_USER_BASE;
+ pdd->gpuvm_limit =
+ pdd->dev->shared_resources.gpuvm_size - 1;
+
+ pdd->scratch_base = MAKE_SCRATCH_APP_BASE_V9();
+ pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
+}
+
int kfd_init_apertures(struct kfd_process *process)
{
uint8_t id = 0;
@@ -307,9 +368,7 @@ int kfd_init_apertures(struct kfd_process *process)
struct kfd_process_device *pdd;
/*Iterating over all devices*/
- while (kfd_topology_enum_kfd_devices(id, &dev) == 0 &&
- id < NUM_OF_SUPPORTED_GPUS) {
-
+ while (kfd_topology_enum_kfd_devices(id, &dev) == 0) {
if (!dev) {
id++; /* Skip non GPU devices */
continue;
@@ -318,7 +377,7 @@ int kfd_init_apertures(struct kfd_process *process)
pdd = kfd_create_process_device_data(dev, process);
if (!pdd) {
pr_err("Failed to create process device data\n");
- return -1;
+ return -ENOMEM;
}
/*
* For 64 bit process apertures will be statically reserved in
@@ -330,32 +389,30 @@ int kfd_init_apertures(struct kfd_process *process)
pdd->gpuvm_base = pdd->gpuvm_limit = 0;
pdd->scratch_base = pdd->scratch_limit = 0;
} else {
- /* Same LDS and scratch apertures can be used
- * on all GPUs. This allows using more dGPUs
- * than placement options for apertures.
- */
- pdd->lds_base = MAKE_LDS_APP_BASE();
- pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
-
- pdd->scratch_base = MAKE_SCRATCH_APP_BASE();
- pdd->scratch_limit =
- MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
+ switch (dev->device_info->asic_family) {
+ case CHIP_KAVERI:
+ case CHIP_HAWAII:
+ case CHIP_CARRIZO:
+ case CHIP_TONGA:
+ case CHIP_FIJI:
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ kfd_init_apertures_vi(pdd, id);
+ break;
+ case CHIP_VEGA10:
+ case CHIP_RAVEN:
+ kfd_init_apertures_v9(pdd, id);
+ break;
+ default:
+ WARN(1, "Unexpected ASIC family %u",
+ dev->device_info->asic_family);
+ return -EINVAL;
+ }
- if (dev->device_info->needs_iommu_device) {
- /* APUs: GPUVM aperture in
- * non-canonical address space
- */
- pdd->gpuvm_base = MAKE_GPUVM_APP_BASE(id + 1);
- pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT(
- pdd->gpuvm_base,
- dev->shared_resources.gpuvm_size);
- } else {
- /* dGPUs: SVM aperture starting at 0
- * with small reserved space for kernel
+ if (!dev->device_info->needs_iommu_device) {
+ /* dGPUs: the reserved space for kernel
+ * before SVM
*/
- pdd->gpuvm_base = SVM_USER_BASE;
- pdd->gpuvm_limit =
- dev->shared_resources.gpuvm_size - 1;
pdd->qpd.cwsr_base = SVM_CWSR_BASE;
pdd->qpd.ib_base = SVM_IB_BASE;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
new file mode 100644
index 000000000000..37029baa3346
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "kfd_priv.h"
+#include "kfd_events.h"
+#include "soc15_int.h"
+
+
+static bool event_interrupt_isr_v9(struct kfd_dev *dev,
+ const uint32_t *ih_ring_entry)
+{
+ uint16_t source_id, client_id, pasid, vmid;
+ const uint32_t *data = ih_ring_entry;
+
+ /* Only handle interrupts from KFD VMIDs */
+ vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);
+ if (vmid < dev->vm_info.first_vmid_kfd ||
+ vmid > dev->vm_info.last_vmid_kfd)
+ return 0;
+
+ /* If there is no valid PASID, it's likely a firmware bug */
+ pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry);
+ if (WARN_ONCE(pasid == 0, "FW bug: No PASID in KFD interrupt"))
+ return 0;
+
+ source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);
+ client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);
+
+ pr_debug("client id 0x%x, source id %d, pasid 0x%x. raw data:\n",
+ client_id, source_id, pasid);
+ pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
+ data[0], data[1], data[2], data[3],
+ data[4], data[5], data[6], data[7]);
+
+ /* Interrupt types we care about: various signals and faults.
+ * They will be forwarded to a work queue (see below).
+ */
+ return source_id == SOC15_INTSRC_CP_END_OF_PIPE ||
+ source_id == SOC15_INTSRC_SDMA_TRAP ||
+ source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG ||
+ source_id == SOC15_INTSRC_CP_BAD_OPCODE;
+}
+
+static void event_interrupt_wq_v9(struct kfd_dev *dev,
+ const uint32_t *ih_ring_entry)
+{
+ uint16_t source_id, client_id, pasid, vmid;
+ uint32_t context_id;
+
+ source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);
+ client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);
+ pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry);
+ vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);
+ context_id = SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry);
+
+ if (source_id == SOC15_INTSRC_CP_END_OF_PIPE)
+ kfd_signal_event_interrupt(pasid, context_id, 32);
+ else if (source_id == SOC15_INTSRC_SDMA_TRAP)
+ kfd_signal_event_interrupt(pasid, context_id & 0xfffffff, 28);
+ else if (source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG)
+ kfd_signal_event_interrupt(pasid, context_id & 0xffffff, 24);
+ else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE)
+ kfd_signal_hw_exception_event(pasid);
+ else if (client_id == SOC15_IH_CLIENTID_VMC ||
+ client_id == SOC15_IH_CLIENTID_UTCL2) {
+ /* TODO */
+ }
+}
+
+const struct kfd_event_interrupt_class event_interrupt_class_v9 = {
+ .interrupt_isr = event_interrupt_isr_v9,
+ .interrupt_wq = event_interrupt_wq_v9,
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
index 035c351f47c5..db6d9336b80d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
@@ -139,10 +139,12 @@ static void interrupt_wq(struct work_struct *work)
{
struct kfd_dev *dev = container_of(work, struct kfd_dev,
interrupt_work);
+ uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE];
- uint32_t ih_ring_entry[DIV_ROUND_UP(
- dev->device_info->ih_ring_entry_size,
- sizeof(uint32_t))];
+ if (dev->device_info->ih_ring_entry_size > sizeof(ih_ring_entry)) {
+ dev_err_once(kfd_chardev(), "Ring entry too small\n");
+ return;
+ }
while (dequeue_ih_ring_entry(dev, ih_ring_entry))
dev->device_info->event_interrupt_class->interrupt_wq(dev,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 69f496485331..476951d8c91c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -99,7 +99,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
kq->rptr_kernel = kq->rptr_mem->cpu_ptr;
kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr;
- retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->wptr_kernel),
+ retval = kfd_gtt_sa_allocate(dev, dev->device_info->doorbell_size,
&kq->wptr_mem);
if (retval != 0)
@@ -208,6 +208,7 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
size_t available_size;
size_t queue_size_dwords;
uint32_t wptr, rptr;
+ uint64_t wptr64;
unsigned int *queue_address;
/* When rptr == wptr, the buffer is empty.
@@ -216,7 +217,8 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
* the opposite. So we can only use up to queue_size_dwords - 1 dwords.
*/
rptr = *kq->rptr_kernel;
- wptr = *kq->wptr_kernel;
+ wptr = kq->pending_wptr;
+ wptr64 = kq->pending_wptr64;
queue_address = (unsigned int *)kq->pq_kernel_addr;
queue_size_dwords = kq->queue->properties.queue_size / 4;
@@ -232,29 +234,33 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
* make sure calling functions know
* acquire_packet_buffer() failed
*/
- *buffer_ptr = NULL;
- return -ENOMEM;
+ goto err_no_space;
}
if (wptr + packet_size_in_dwords >= queue_size_dwords) {
/* make sure after rolling back to position 0, there is
* still enough space.
*/
- if (packet_size_in_dwords >= rptr) {
- *buffer_ptr = NULL;
- return -ENOMEM;
- }
+ if (packet_size_in_dwords >= rptr)
+ goto err_no_space;
+
/* fill nops, roll back and start at position 0 */
while (wptr > 0) {
queue_address[wptr] = kq->nop_packet;
wptr = (wptr + 1) % queue_size_dwords;
+ wptr64++;
}
}
*buffer_ptr = &queue_address[wptr];
kq->pending_wptr = wptr + packet_size_in_dwords;
+ kq->pending_wptr64 = wptr64 + packet_size_in_dwords;
return 0;
+
+err_no_space:
+ *buffer_ptr = NULL;
+ return -ENOMEM;
}
static void submit_packet(struct kernel_queue *kq)
@@ -270,14 +276,18 @@ static void submit_packet(struct kernel_queue *kq)
pr_debug("\n");
#endif
- *kq->wptr_kernel = kq->pending_wptr;
- write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
- kq->pending_wptr);
+ kq->ops_asic_specific.submit_packet(kq);
}
static void rollback_packet(struct kernel_queue *kq)
{
- kq->pending_wptr = *kq->queue->properties.write_ptr;
+ if (kq->dev->device_info->doorbell_size == 8) {
+ kq->pending_wptr64 = *kq->wptr64_kernel;
+ kq->pending_wptr = *kq->wptr_kernel %
+ (kq->queue->properties.queue_size / 4);
+ } else {
+ kq->pending_wptr = *kq->wptr_kernel;
+ }
}
struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
@@ -308,6 +318,11 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
case CHIP_HAWAII:
kernel_queue_init_cik(&kq->ops_asic_specific);
break;
+
+ case CHIP_VEGA10:
+ case CHIP_RAVEN:
+ kernel_queue_init_v9(&kq->ops_asic_specific);
+ break;
default:
WARN(1, "Unexpected ASIC family %u",
dev->device_info->asic_family);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
index 594053136ee4..97aff2041a5d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
@@ -72,6 +72,7 @@ struct kernel_queue {
struct kfd_dev *dev;
struct mqd_manager *mqd;
struct queue *queue;
+ uint64_t pending_wptr64;
uint32_t pending_wptr;
unsigned int nop_packet;
@@ -79,7 +80,10 @@ struct kernel_queue {
uint32_t *rptr_kernel;
uint64_t rptr_gpu_addr;
struct kfd_mem_obj *wptr_mem;
- uint32_t *wptr_kernel;
+ union {
+ uint64_t *wptr64_kernel;
+ uint32_t *wptr_kernel;
+ };
uint64_t wptr_gpu_addr;
struct kfd_mem_obj *pq;
uint64_t pq_gpu_addr;
@@ -97,5 +101,6 @@ struct kernel_queue {
void kernel_queue_init_cik(struct kernel_queue_ops *ops);
void kernel_queue_init_vi(struct kernel_queue_ops *ops);
+void kernel_queue_init_v9(struct kernel_queue_ops *ops);
#endif /* KFD_KERNEL_QUEUE_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c
index a90eb440b1fb..19e54acb4125 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c
@@ -26,11 +26,13 @@
static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
enum kfd_queue_type type, unsigned int queue_size);
static void uninitialize_cik(struct kernel_queue *kq);
+static void submit_packet_cik(struct kernel_queue *kq);
void kernel_queue_init_cik(struct kernel_queue_ops *ops)
{
ops->initialize = initialize_cik;
ops->uninitialize = uninitialize_cik;
+ ops->submit_packet = submit_packet_cik;
}
static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
@@ -42,3 +44,10 @@ static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
static void uninitialize_cik(struct kernel_queue *kq)
{
}
+
+static void submit_packet_cik(struct kernel_queue *kq)
+{
+ *kq->wptr_kernel = kq->pending_wptr;
+ write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
+ kq->pending_wptr);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
new file mode 100644
index 000000000000..684a3bf07efd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
@@ -0,0 +1,340 @@
+/*
+ * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "kfd_kernel_queue.h"
+#include "kfd_device_queue_manager.h"
+#include "kfd_pm4_headers_ai.h"
+#include "kfd_pm4_opcodes.h"
+
+static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev,
+ enum kfd_queue_type type, unsigned int queue_size);
+static void uninitialize_v9(struct kernel_queue *kq);
+static void submit_packet_v9(struct kernel_queue *kq);
+
+void kernel_queue_init_v9(struct kernel_queue_ops *ops)
+{
+ ops->initialize = initialize_v9;
+ ops->uninitialize = uninitialize_v9;
+ ops->submit_packet = submit_packet_v9;
+}
+
+static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev,
+ enum kfd_queue_type type, unsigned int queue_size)
+{
+ int retval;
+
+ retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem);
+ if (retval)
+ return false;
+
+ kq->eop_gpu_addr = kq->eop_mem->gpu_addr;
+ kq->eop_kernel_addr = kq->eop_mem->cpu_ptr;
+
+ memset(kq->eop_kernel_addr, 0, PAGE_SIZE);
+
+ return true;
+}
+
+static void uninitialize_v9(struct kernel_queue *kq)
+{
+ kfd_gtt_sa_free(kq->dev, kq->eop_mem);
+}
+
+static void submit_packet_v9(struct kernel_queue *kq)
+{
+ *kq->wptr64_kernel = kq->pending_wptr64;
+ write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
+ kq->pending_wptr64);
+}
+
+static int pm_map_process_v9(struct packet_manager *pm,
+ uint32_t *buffer, struct qcm_process_device *qpd)
+{
+ struct pm4_mes_map_process *packet;
+ uint64_t vm_page_table_base_addr =
+ (uint64_t)(qpd->page_table_base) << 12;
+
+ packet = (struct pm4_mes_map_process *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_mes_map_process));
+
+ packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
+ sizeof(struct pm4_mes_map_process));
+ packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
+ packet->bitfields2.process_quantum = 1;
+ packet->bitfields2.pasid = qpd->pqm->process->pasid;
+ packet->bitfields14.gds_size = qpd->gds_size;
+ packet->bitfields14.num_gws = qpd->num_gws;
+ packet->bitfields14.num_oac = qpd->num_oac;
+ packet->bitfields14.sdma_enable = 1;
+ packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
+
+ packet->sh_mem_config = qpd->sh_mem_config;
+ packet->sh_mem_bases = qpd->sh_mem_bases;
+ packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
+ packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8);
+ packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
+ packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
+
+ packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
+ packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
+
+ packet->vm_context_page_table_base_addr_lo32 =
+ lower_32_bits(vm_page_table_base_addr);
+ packet->vm_context_page_table_base_addr_hi32 =
+ upper_32_bits(vm_page_table_base_addr);
+
+ return 0;
+}
+
+static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer,
+ uint64_t ib, size_t ib_size_in_dwords, bool chain)
+{
+ struct pm4_mes_runlist *packet;
+
+ int concurrent_proc_cnt = 0;
+ struct kfd_dev *kfd = pm->dqm->dev;
+
+ /* Determine the number of processes to map together to HW:
+ * it can not exceed the number of VMIDs available to the
+ * scheduler, and it is determined by the smaller of the number
+ * of processes in the runlist and kfd module parameter
+ * hws_max_conc_proc.
+ * Note: the arbitration between the number of VMIDs and
+ * hws_max_conc_proc has been done in
+ * kgd2kfd_device_init().
+ */
+ concurrent_proc_cnt = min(pm->dqm->processes_count,
+ kfd->max_proc_per_quantum);
+
+ packet = (struct pm4_mes_runlist *)buffer;
+
+ memset(buffer, 0, sizeof(struct pm4_mes_runlist));
+ packet->header.u32All = pm_build_pm4_header(IT_RUN_LIST,
+ sizeof(struct pm4_mes_runlist));
+
+ packet->bitfields4.ib_size = ib_size_in_dwords;
+ packet->bitfields4.chain = chain ? 1 : 0;
+ packet->bitfields4.offload_polling = 0;
+ packet->bitfields4.valid = 1;
+ packet->bitfields4.process_cnt = concurrent_proc_cnt;
+ packet->ordinal2 = lower_32_bits(ib);
+ packet->ib_base_hi = upper_32_bits(ib);
+
+ return 0;
+}
+
+static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
+ struct queue *q, bool is_static)
+{
+ struct pm4_mes_map_queues *packet;
+ bool use_static = is_static;
+
+ packet = (struct pm4_mes_map_queues *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_mes_map_queues));
+
+ packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
+ sizeof(struct pm4_mes_map_queues));
+ packet->bitfields2.alloc_format =
+ alloc_format__mes_map_queues__one_per_pipe_vi;
+ packet->bitfields2.num_queues = 1;
+ packet->bitfields2.queue_sel =
+ queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
+
+ packet->bitfields2.engine_sel =
+ engine_sel__mes_map_queues__compute_vi;
+ packet->bitfields2.queue_type =
+ queue_type__mes_map_queues__normal_compute_vi;
+
+ switch (q->properties.type) {
+ case KFD_QUEUE_TYPE_COMPUTE:
+ if (use_static)
+ packet->bitfields2.queue_type =
+ queue_type__mes_map_queues__normal_latency_static_queue_vi;
+ break;
+ case KFD_QUEUE_TYPE_DIQ:
+ packet->bitfields2.queue_type =
+ queue_type__mes_map_queues__debug_interface_queue_vi;
+ break;
+ case KFD_QUEUE_TYPE_SDMA:
+ packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
+ engine_sel__mes_map_queues__sdma0_vi;
+ use_static = false; /* no static queues under SDMA */
+ break;
+ default:
+ WARN(1, "queue type %d", q->properties.type);
+ return -EINVAL;
+ }
+ packet->bitfields3.doorbell_offset =
+ q->properties.doorbell_off;
+
+ packet->mqd_addr_lo =
+ lower_32_bits(q->gart_mqd_addr);
+
+ packet->mqd_addr_hi =
+ upper_32_bits(q->gart_mqd_addr);
+
+ packet->wptr_addr_lo =
+ lower_32_bits((uint64_t)q->properties.write_ptr);
+
+ packet->wptr_addr_hi =
+ upper_32_bits((uint64_t)q->properties.write_ptr);
+
+ return 0;
+}
+
+static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer,
+ enum kfd_queue_type type,
+ enum kfd_unmap_queues_filter filter,
+ uint32_t filter_param, bool reset,
+ unsigned int sdma_engine)
+{
+ struct pm4_mes_unmap_queues *packet;
+
+ packet = (struct pm4_mes_unmap_queues *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues));
+
+ packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES,
+ sizeof(struct pm4_mes_unmap_queues));
+ switch (type) {
+ case KFD_QUEUE_TYPE_COMPUTE:
+ case KFD_QUEUE_TYPE_DIQ:
+ packet->bitfields2.engine_sel =
+ engine_sel__mes_unmap_queues__compute;
+ break;
+ case KFD_QUEUE_TYPE_SDMA:
+ packet->bitfields2.engine_sel =
+ engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
+ break;
+ default:
+ WARN(1, "queue type %d", type);
+ return -EINVAL;
+ }
+
+ if (reset)
+ packet->bitfields2.action =
+ action__mes_unmap_queues__reset_queues;
+ else
+ packet->bitfields2.action =
+ action__mes_unmap_queues__preempt_queues;
+
+ switch (filter) {
+ case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE:
+ packet->bitfields2.queue_sel =
+ queue_sel__mes_unmap_queues__perform_request_on_specified_queues;
+ packet->bitfields2.num_queues = 1;
+ packet->bitfields3b.doorbell_offset0 = filter_param;
+ break;
+ case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
+ packet->bitfields2.queue_sel =
+ queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
+ packet->bitfields3a.pasid = filter_param;
+ break;
+ case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES:
+ packet->bitfields2.queue_sel =
+ queue_sel__mes_unmap_queues__unmap_all_queues;
+ break;
+ case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES:
+ /* in this case, we do not preempt static queues */
+ packet->bitfields2.queue_sel =
+ queue_sel__mes_unmap_queues__unmap_all_non_static_queues;
+ break;
+ default:
+ WARN(1, "filter %d", filter);
+ return -EINVAL;
+ }
+
+ return 0;
+
+}
+
+static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
+ uint64_t fence_address, uint32_t fence_value)
+{
+ struct pm4_mes_query_status *packet;
+
+ packet = (struct pm4_mes_query_status *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_mes_query_status));
+
+
+ packet->header.u32All = pm_build_pm4_header(IT_QUERY_STATUS,
+ sizeof(struct pm4_mes_query_status));
+
+ packet->bitfields2.context_id = 0;
+ packet->bitfields2.interrupt_sel =
+ interrupt_sel__mes_query_status__completion_status;
+ packet->bitfields2.command =
+ command__mes_query_status__fence_only_after_write_ack;
+
+ packet->addr_hi = upper_32_bits((uint64_t)fence_address);
+ packet->addr_lo = lower_32_bits((uint64_t)fence_address);
+ packet->data_hi = upper_32_bits((uint64_t)fence_value);
+ packet->data_lo = lower_32_bits((uint64_t)fence_value);
+
+ return 0;
+}
+
+
+static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
+{
+ struct pm4_mec_release_mem *packet;
+
+ packet = (struct pm4_mec_release_mem *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
+
+ packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
+ sizeof(struct pm4_mec_release_mem));
+
+ packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
+ packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
+ packet->bitfields2.tcl1_action_ena = 1;
+ packet->bitfields2.tc_action_ena = 1;
+ packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
+
+ packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
+ packet->bitfields3.int_sel =
+ int_sel__mec_release_mem__send_interrupt_after_write_confirm;
+
+ packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
+ packet->address_hi = upper_32_bits(gpu_addr);
+
+ packet->data_lo = 0;
+
+ return 0;
+}
+
+const struct packet_manager_funcs kfd_v9_pm_funcs = {
+ .map_process = pm_map_process_v9,
+ .runlist = pm_runlist_v9,
+ .set_resources = pm_set_resources_vi,
+ .map_queues = pm_map_queues_v9,
+ .unmap_queues = pm_unmap_queues_v9,
+ .query_status = pm_query_status_v9,
+ .release_mem = pm_release_mem_v9,
+ .map_process_size = sizeof(struct pm4_mes_map_process),
+ .runlist_size = sizeof(struct pm4_mes_runlist),
+ .set_resources_size = sizeof(struct pm4_mes_set_resources),
+ .map_queues_size = sizeof(struct pm4_mes_map_queues),
+ .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
+ .query_status_size = sizeof(struct pm4_mes_query_status),
+ .release_mem_size = sizeof(struct pm4_mec_release_mem)
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
index f1d48281e322..bf20c6d32ef3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
@@ -22,15 +22,20 @@
*/
#include "kfd_kernel_queue.h"
+#include "kfd_device_queue_manager.h"
+#include "kfd_pm4_headers_vi.h"
+#include "kfd_pm4_opcodes.h"
static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev,
enum kfd_queue_type type, unsigned int queue_size);
static void uninitialize_vi(struct kernel_queue *kq);
+static void submit_packet_vi(struct kernel_queue *kq);
void kernel_queue_init_vi(struct kernel_queue_ops *ops)
{
ops->initialize = initialize_vi;
ops->uninitialize = uninitialize_vi;
+ ops->submit_packet = submit_packet_vi;
}
static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev,
@@ -54,3 +59,317 @@ static void uninitialize_vi(struct kernel_queue *kq)
{
kfd_gtt_sa_free(kq->dev, kq->eop_mem);
}
+
+static void submit_packet_vi(struct kernel_queue *kq)
+{
+ *kq->wptr_kernel = kq->pending_wptr;
+ write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
+ kq->pending_wptr);
+}
+
+unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size)
+{
+ union PM4_MES_TYPE_3_HEADER header;
+
+ header.u32All = 0;
+ header.opcode = opcode;
+ header.count = packet_size / 4 - 2;
+ header.type = PM4_TYPE_3;
+
+ return header.u32All;
+}
+
+static int pm_map_process_vi(struct packet_manager *pm, uint32_t *buffer,
+ struct qcm_process_device *qpd)
+{
+ struct pm4_mes_map_process *packet;
+
+ packet = (struct pm4_mes_map_process *)buffer;
+
+ memset(buffer, 0, sizeof(struct pm4_mes_map_process));
+
+ packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
+ sizeof(struct pm4_mes_map_process));
+ packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
+ packet->bitfields2.process_quantum = 1;
+ packet->bitfields2.pasid = qpd->pqm->process->pasid;
+ packet->bitfields3.page_table_base = qpd->page_table_base;
+ packet->bitfields10.gds_size = qpd->gds_size;
+ packet->bitfields10.num_gws = qpd->num_gws;
+ packet->bitfields10.num_oac = qpd->num_oac;
+ packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
+
+ packet->sh_mem_config = qpd->sh_mem_config;
+ packet->sh_mem_bases = qpd->sh_mem_bases;
+ packet->sh_mem_ape1_base = qpd->sh_mem_ape1_base;
+ packet->sh_mem_ape1_limit = qpd->sh_mem_ape1_limit;
+
+ packet->sh_hidden_private_base_vmid = qpd->sh_hidden_private_base;
+
+ packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
+ packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
+
+ return 0;
+}
+
+static int pm_runlist_vi(struct packet_manager *pm, uint32_t *buffer,
+ uint64_t ib, size_t ib_size_in_dwords, bool chain)
+{
+ struct pm4_mes_runlist *packet;
+ int concurrent_proc_cnt = 0;
+ struct kfd_dev *kfd = pm->dqm->dev;
+
+ if (WARN_ON(!ib))
+ return -EFAULT;
+
+ /* Determine the number of processes to map together to HW:
+ * it can not exceed the number of VMIDs available to the
+ * scheduler, and it is determined by the smaller of the number
+ * of processes in the runlist and kfd module parameter
+ * hws_max_conc_proc.
+ * Note: the arbitration between the number of VMIDs and
+ * hws_max_conc_proc has been done in
+ * kgd2kfd_device_init().
+ */
+ concurrent_proc_cnt = min(pm->dqm->processes_count,
+ kfd->max_proc_per_quantum);
+
+ packet = (struct pm4_mes_runlist *)buffer;
+
+ memset(buffer, 0, sizeof(struct pm4_mes_runlist));
+ packet->header.u32All = pm_build_pm4_header(IT_RUN_LIST,
+ sizeof(struct pm4_mes_runlist));
+
+ packet->bitfields4.ib_size = ib_size_in_dwords;
+ packet->bitfields4.chain = chain ? 1 : 0;
+ packet->bitfields4.offload_polling = 0;
+ packet->bitfields4.valid = 1;
+ packet->bitfields4.process_cnt = concurrent_proc_cnt;
+ packet->ordinal2 = lower_32_bits(ib);
+ packet->bitfields3.ib_base_hi = upper_32_bits(ib);
+
+ return 0;
+}
+
+int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer,
+ struct scheduling_resources *res)
+{
+ struct pm4_mes_set_resources *packet;
+
+ packet = (struct pm4_mes_set_resources *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_mes_set_resources));
+
+ packet->header.u32All = pm_build_pm4_header(IT_SET_RESOURCES,
+ sizeof(struct pm4_mes_set_resources));
+
+ packet->bitfields2.queue_type =
+ queue_type__mes_set_resources__hsa_interface_queue_hiq;
+ packet->bitfields2.vmid_mask = res->vmid_mask;
+ packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100;
+ packet->bitfields7.oac_mask = res->oac_mask;
+ packet->bitfields8.gds_heap_base = res->gds_heap_base;
+ packet->bitfields8.gds_heap_size = res->gds_heap_size;
+
+ packet->gws_mask_lo = lower_32_bits(res->gws_mask);
+ packet->gws_mask_hi = upper_32_bits(res->gws_mask);
+
+ packet->queue_mask_lo = lower_32_bits(res->queue_mask);
+ packet->queue_mask_hi = upper_32_bits(res->queue_mask);
+
+ return 0;
+}
+
+static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer,
+ struct queue *q, bool is_static)
+{
+ struct pm4_mes_map_queues *packet;
+ bool use_static = is_static;
+
+ packet = (struct pm4_mes_map_queues *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_mes_map_queues));
+
+ packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
+ sizeof(struct pm4_mes_map_queues));
+ packet->bitfields2.alloc_format =
+ alloc_format__mes_map_queues__one_per_pipe_vi;
+ packet->bitfields2.num_queues = 1;
+ packet->bitfields2.queue_sel =
+ queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
+
+ packet->bitfields2.engine_sel =
+ engine_sel__mes_map_queues__compute_vi;
+ packet->bitfields2.queue_type =
+ queue_type__mes_map_queues__normal_compute_vi;
+
+ switch (q->properties.type) {
+ case KFD_QUEUE_TYPE_COMPUTE:
+ if (use_static)
+ packet->bitfields2.queue_type =
+ queue_type__mes_map_queues__normal_latency_static_queue_vi;
+ break;
+ case KFD_QUEUE_TYPE_DIQ:
+ packet->bitfields2.queue_type =
+ queue_type__mes_map_queues__debug_interface_queue_vi;
+ break;
+ case KFD_QUEUE_TYPE_SDMA:
+ packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
+ engine_sel__mes_map_queues__sdma0_vi;
+ use_static = false; /* no static queues under SDMA */
+ break;
+ default:
+ WARN(1, "queue type %d", q->properties.type);
+ return -EINVAL;
+ }
+ packet->bitfields3.doorbell_offset =
+ q->properties.doorbell_off;
+
+ packet->mqd_addr_lo =
+ lower_32_bits(q->gart_mqd_addr);
+
+ packet->mqd_addr_hi =
+ upper_32_bits(q->gart_mqd_addr);
+
+ packet->wptr_addr_lo =
+ lower_32_bits((uint64_t)q->properties.write_ptr);
+
+ packet->wptr_addr_hi =
+ upper_32_bits((uint64_t)q->properties.write_ptr);
+
+ return 0;
+}
+
+static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer,
+ enum kfd_queue_type type,
+ enum kfd_unmap_queues_filter filter,
+ uint32_t filter_param, bool reset,
+ unsigned int sdma_engine)
+{
+ struct pm4_mes_unmap_queues *packet;
+
+ packet = (struct pm4_mes_unmap_queues *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues));
+
+ packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES,
+ sizeof(struct pm4_mes_unmap_queues));
+ switch (type) {
+ case KFD_QUEUE_TYPE_COMPUTE:
+ case KFD_QUEUE_TYPE_DIQ:
+ packet->bitfields2.engine_sel =
+ engine_sel__mes_unmap_queues__compute;
+ break;
+ case KFD_QUEUE_TYPE_SDMA:
+ packet->bitfields2.engine_sel =
+ engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
+ break;
+ default:
+ WARN(1, "queue type %d", type);
+ return -EINVAL;
+ }
+
+ if (reset)
+ packet->bitfields2.action =
+ action__mes_unmap_queues__reset_queues;
+ else
+ packet->bitfields2.action =
+ action__mes_unmap_queues__preempt_queues;
+
+ switch (filter) {
+ case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE:
+ packet->bitfields2.queue_sel =
+ queue_sel__mes_unmap_queues__perform_request_on_specified_queues;
+ packet->bitfields2.num_queues = 1;
+ packet->bitfields3b.doorbell_offset0 = filter_param;
+ break;
+ case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
+ packet->bitfields2.queue_sel =
+ queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
+ packet->bitfields3a.pasid = filter_param;
+ break;
+ case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES:
+ packet->bitfields2.queue_sel =
+ queue_sel__mes_unmap_queues__unmap_all_queues;
+ break;
+ case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES:
+ /* in this case, we do not preempt static queues */
+ packet->bitfields2.queue_sel =
+ queue_sel__mes_unmap_queues__unmap_all_non_static_queues;
+ break;
+ default:
+ WARN(1, "filter %d", filter);
+ return -EINVAL;
+ }
+
+ return 0;
+
+}
+
+static int pm_query_status_vi(struct packet_manager *pm, uint32_t *buffer,
+ uint64_t fence_address, uint32_t fence_value)
+{
+ struct pm4_mes_query_status *packet;
+
+ packet = (struct pm4_mes_query_status *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_mes_query_status));
+
+ packet->header.u32All = pm_build_pm4_header(IT_QUERY_STATUS,
+ sizeof(struct pm4_mes_query_status));
+
+ packet->bitfields2.context_id = 0;
+ packet->bitfields2.interrupt_sel =
+ interrupt_sel__mes_query_status__completion_status;
+ packet->bitfields2.command =
+ command__mes_query_status__fence_only_after_write_ack;
+
+ packet->addr_hi = upper_32_bits((uint64_t)fence_address);
+ packet->addr_lo = lower_32_bits((uint64_t)fence_address);
+ packet->data_hi = upper_32_bits((uint64_t)fence_value);
+ packet->data_lo = lower_32_bits((uint64_t)fence_value);
+
+ return 0;
+}
+
+static int pm_release_mem_vi(uint64_t gpu_addr, uint32_t *buffer)
+{
+ struct pm4_mec_release_mem *packet;
+
+ packet = (struct pm4_mec_release_mem *)buffer;
+ memset(buffer, 0, sizeof(*packet));
+
+ packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
+ sizeof(*packet));
+
+ packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
+ packet->bitfields2.event_index = event_index___release_mem__end_of_pipe;
+ packet->bitfields2.tcl1_action_ena = 1;
+ packet->bitfields2.tc_action_ena = 1;
+ packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
+ packet->bitfields2.atc = 0;
+
+ packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low;
+ packet->bitfields3.int_sel =
+ int_sel___release_mem__send_interrupt_after_write_confirm;
+
+ packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
+ packet->address_hi = upper_32_bits(gpu_addr);
+
+ packet->data_lo = 0;
+
+ return 0;
+}
+
+const struct packet_manager_funcs kfd_vi_pm_funcs = {
+ .map_process = pm_map_process_vi,
+ .runlist = pm_runlist_vi,
+ .set_resources = pm_set_resources_vi,
+ .map_queues = pm_map_queues_vi,
+ .unmap_queues = pm_unmap_queues_vi,
+ .query_status = pm_query_status_vi,
+ .release_mem = pm_release_mem_vi,
+ .map_process_size = sizeof(struct pm4_mes_map_process),
+ .runlist_size = sizeof(struct pm4_mes_runlist),
+ .set_resources_size = sizeof(struct pm4_mes_set_resources),
+ .map_queues_size = sizeof(struct pm4_mes_map_queues),
+ .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
+ .query_status_size = sizeof(struct pm4_mes_query_status),
+ .release_mem_size = sizeof(struct pm4_mec_release_mem)
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index e0c07d24d251..76bf2dc8aec4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -43,6 +43,8 @@ static const struct kgd2kfd_calls kgd2kfd = {
.interrupt = kgd2kfd_interrupt,
.suspend = kgd2kfd_suspend,
.resume = kgd2kfd_resume,
+ .quiesce_mm = kgd2kfd_quiesce_mm,
+ .resume_mm = kgd2kfd_resume_mm,
.schedule_evict_and_restore_process =
kgd2kfd_schedule_evict_and_restore_process,
};
@@ -81,6 +83,11 @@ module_param(ignore_crat, int, 0444);
MODULE_PARM_DESC(ignore_crat,
"Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)");
+int vega10_noretry;
+module_param_named(noretry, vega10_noretry, int, 0644);
+MODULE_PARM_DESC(noretry,
+ "Set sh_mem_config.retry_disable on Vega10 (0 = retry enabled (default), 1 = retry disabled)");
+
static int amdkfd_init_completed;
int kgd2kfd_init(unsigned int interface_version,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index ee7061e1c466..4b8eb506642b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -38,6 +38,9 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
case CHIP_POLARIS10:
case CHIP_POLARIS11:
return mqd_manager_init_vi_tonga(type, dev);
+ case CHIP_VEGA10:
+ case CHIP_RAVEN:
+ return mqd_manager_init_v9(type, dev);
default:
WARN(1, "Unexpected ASIC family %u",
dev->device_info->asic_family);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index c00c325ed3c9..06eaa218eba6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -79,10 +79,6 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
m->cp_mqd_base_addr_lo = lower_32_bits(addr);
m->cp_mqd_base_addr_hi = upper_32_bits(addr);
- m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE | IB_ATC_EN;
- /* Although WinKFD writes this, I suspect it should not be necessary */
- m->cp_hqd_ib_control = IB_ATC_EN | DEFAULT_MIN_IB_AVAIL_SIZE;
-
m->cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS |
QUANTUM_DURATION(10);
@@ -412,7 +408,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
return NULL;
- mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
+ mqd = kzalloc(sizeof(*mqd), GFP_NOIO);
if (!mqd)
return NULL;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
new file mode 100644
index 000000000000..684054ff02cd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -0,0 +1,443 @@
+/*
+ * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include "kfd_priv.h"
+#include "kfd_mqd_manager.h"
+#include "v9_structs.h"
+#include "gc/gc_9_0_offset.h"
+#include "gc/gc_9_0_sh_mask.h"
+#include "sdma0/sdma0_4_0_sh_mask.h"
+
+static inline struct v9_mqd *get_mqd(void *mqd)
+{
+ return (struct v9_mqd *)mqd;
+}
+
+static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+ return (struct v9_sdma_mqd *)mqd;
+}
+
+static int init_mqd(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ int retval;
+ uint64_t addr;
+ struct v9_mqd *m;
+ struct kfd_dev *kfd = mm->dev;
+
+ /* From V9, for CWSR, the control stack is located on the next page
+ * boundary after the mqd, we will use the gtt allocation function
+ * instead of sub-allocation function.
+ */
+ if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
+ *mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
+ if (!*mqd_mem_obj)
+ return -ENOMEM;
+ retval = kfd->kfd2kgd->init_gtt_mem_allocation(kfd->kgd,
+ ALIGN(q->ctl_stack_size, PAGE_SIZE) +
+ ALIGN(sizeof(struct v9_mqd), PAGE_SIZE),
+ &((*mqd_mem_obj)->gtt_mem),
+ &((*mqd_mem_obj)->gpu_addr),
+ (void *)&((*mqd_mem_obj)->cpu_ptr));
+ } else
+ retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct v9_mqd),
+ mqd_mem_obj);
+ if (retval != 0)
+ return -ENOMEM;
+
+ m = (struct v9_mqd *) (*mqd_mem_obj)->cpu_ptr;
+ addr = (*mqd_mem_obj)->gpu_addr;
+
+ memset(m, 0, sizeof(struct v9_mqd));
+
+ m->header = 0xC0310800;
+ m->compute_pipelinestat_enable = 1;
+ m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
+
+ m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
+ 0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
+
+ m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT;
+
+ m->cp_mqd_base_addr_lo = lower_32_bits(addr);
+ m->cp_mqd_base_addr_hi = upper_32_bits(addr);
+
+ m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
+ 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
+ 10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
+
+ m->cp_hqd_pipe_priority = 1;
+ m->cp_hqd_queue_priority = 15;
+
+ if (q->format == KFD_QUEUE_FORMAT_AQL) {
+ m->cp_hqd_aql_control =
+ 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
+ }
+
+ if (q->tba_addr) {
+ m->compute_pgm_rsrc2 |=
+ (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT);
+ }
+
+ if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) {
+ m->cp_hqd_persistent_state |=
+ (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
+ m->cp_hqd_ctx_save_base_addr_lo =
+ lower_32_bits(q->ctx_save_restore_area_address);
+ m->cp_hqd_ctx_save_base_addr_hi =
+ upper_32_bits(q->ctx_save_restore_area_address);
+ m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size;
+ m->cp_hqd_cntl_stack_size = q->ctl_stack_size;
+ m->cp_hqd_cntl_stack_offset = q->ctl_stack_size;
+ m->cp_hqd_wg_state_offset = q->ctl_stack_size;
+ }
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+ retval = mm->update_mqd(mm, m, q);
+
+ return retval;
+}
+
+static int load_mqd(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms)
+{
+ /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
+ uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
+
+ return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
+ (uint32_t __user *)p->write_ptr,
+ wptr_shift, 0, mms);
+}
+
+static int update_mqd(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q)
+{
+ struct v9_mqd *m;
+
+ m = get_mqd(mqd);
+
+ m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+ m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1;
+ pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
+
+ m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
+ m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
+
+ m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+ m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+ m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
+ m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
+
+ m->cp_hqd_pq_doorbell_control =
+ q->doorbell_off <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+ pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+ m->cp_hqd_pq_doorbell_control);
+
+ m->cp_hqd_ib_control =
+ 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT |
+ 1 << CP_HQD_IB_CONTROL__IB_EXE_DISABLE__SHIFT;
+
+ /*
+ * HW does not clamp this field correctly. Maximum EOP queue size
+ * is constrained by per-SE EOP done signal count, which is 8-bit.
+ * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit
+ * more than (EOP entry count - 1) so a queue size of 0x800 dwords
+ * is safe, giving a maximum field value of 0xA.
+ */
+ m->cp_hqd_eop_control = min(0xA,
+ order_base_2(q->eop_ring_buffer_size / 4) - 1);
+ m->cp_hqd_eop_base_addr_lo =
+ lower_32_bits(q->eop_ring_buffer_address >> 8);
+ m->cp_hqd_eop_base_addr_hi =
+ upper_32_bits(q->eop_ring_buffer_address >> 8);
+
+ m->cp_hqd_iq_timer = 0;
+
+ m->cp_hqd_vmid = q->vmid;
+
+ if (q->format == KFD_QUEUE_FORMAT_AQL) {
+ m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
+ 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT |
+ 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT |
+ 1 << CP_HQD_PQ_CONTROL__WPP_CLAMP_EN__SHIFT;
+ m->cp_hqd_pq_doorbell_control |= 1 <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT;
+ }
+ if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address)
+ m->cp_hqd_ctx_save_control = 0;
+
+ q->is_active = (q->queue_size > 0 &&
+ q->queue_address != 0 &&
+ q->queue_percent > 0 &&
+ !q->is_evicted);
+
+ return 0;
+}
+
+
+static int destroy_mqd(struct mqd_manager *mm, void *mqd,
+ enum kfd_preempt_type type,
+ unsigned int timeout, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ return mm->dev->kfd2kgd->hqd_destroy
+ (mm->dev->kgd, mqd, type, timeout,
+ pipe_id, queue_id);
+}
+
+static void uninit_mqd(struct mqd_manager *mm, void *mqd,
+ struct kfd_mem_obj *mqd_mem_obj)
+{
+ struct kfd_dev *kfd = mm->dev;
+
+ if (mqd_mem_obj->gtt_mem) {
+ kfd->kfd2kgd->free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem);
+ kfree(mqd_mem_obj);
+ } else {
+ kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
+ }
+}
+
+static bool is_occupied(struct mqd_manager *mm, void *mqd,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ return mm->dev->kfd2kgd->hqd_is_occupied(
+ mm->dev->kgd, queue_address,
+ pipe_id, queue_id);
+}
+
+static int init_mqd_hiq(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ struct v9_mqd *m;
+ int retval = init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
+
+ if (retval != 0)
+ return retval;
+
+ m = get_mqd(*mqd);
+
+ m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
+ 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
+
+ return retval;
+}
+
+static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q)
+{
+ struct v9_mqd *m;
+ int retval = update_mqd(mm, mqd, q);
+
+ if (retval != 0)
+ return retval;
+
+ /* TODO: what's the point? update_mqd already does this. */
+ m = get_mqd(mqd);
+ m->cp_hqd_vmid = q->vmid;
+ return retval;
+}
+
+static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ int retval;
+ struct v9_sdma_mqd *m;
+
+
+ retval = kfd_gtt_sa_allocate(mm->dev,
+ sizeof(struct v9_sdma_mqd),
+ mqd_mem_obj);
+
+ if (retval != 0)
+ return -ENOMEM;
+
+ m = (struct v9_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr;
+
+ memset(m, 0, sizeof(struct v9_sdma_mqd));
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = (*mqd_mem_obj)->gpu_addr;
+
+ retval = mm->update_mqd(mm, m, q);
+
+ return retval;
+}
+
+static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ struct kfd_mem_obj *mqd_mem_obj)
+{
+ kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
+}
+
+static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms)
+{
+ return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd,
+ (uint32_t __user *)p->write_ptr,
+ mms);
+}
+
+#define SDMA_RLC_DUMMY_DEFAULT 0xf
+
+static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q)
+{
+ struct v9_sdma_mqd *m;
+
+ m = get_sdma_mqd(mqd);
+ m->sdmax_rlcx_rb_cntl = order_base_2(q->queue_size / 4)
+ << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
+ q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT |
+ 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
+ 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT;
+
+ m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8);
+ m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8);
+ m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+ m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+ m->sdmax_rlcx_doorbell_offset =
+ q->doorbell_off << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ m->sdma_engine_id = q->sdma_engine_id;
+ m->sdma_queue_id = q->sdma_queue_id;
+ m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT;
+
+ q->is_active = (q->queue_size > 0 &&
+ q->queue_address != 0 &&
+ q->queue_percent > 0 &&
+ !q->is_evicted);
+
+ return 0;
+}
+
+/*
+ * * preempt type here is ignored because there is only one way
+ * * to preempt sdma queue
+ */
+static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ enum kfd_preempt_type type,
+ unsigned int timeout, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout);
+}
+
+static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd);
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+static int debugfs_show_mqd(struct seq_file *m, void *data)
+{
+ seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
+ data, sizeof(struct v9_mqd), false);
+ return 0;
+}
+
+static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
+{
+ seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
+ data, sizeof(struct v9_sdma_mqd), false);
+ return 0;
+}
+
+#endif
+
+struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
+ struct kfd_dev *dev)
+{
+ struct mqd_manager *mqd;
+
+ if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
+ return NULL;
+
+ mqd = kzalloc(sizeof(*mqd), GFP_NOIO);
+ if (!mqd)
+ return NULL;
+
+ mqd->dev = dev;
+
+ switch (type) {
+ case KFD_MQD_TYPE_CP:
+ case KFD_MQD_TYPE_COMPUTE:
+ mqd->init_mqd = init_mqd;
+ mqd->uninit_mqd = uninit_mqd;
+ mqd->load_mqd = load_mqd;
+ mqd->update_mqd = update_mqd;
+ mqd->destroy_mqd = destroy_mqd;
+ mqd->is_occupied = is_occupied;
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+ break;
+ case KFD_MQD_TYPE_HIQ:
+ mqd->init_mqd = init_mqd_hiq;
+ mqd->uninit_mqd = uninit_mqd;
+ mqd->load_mqd = load_mqd;
+ mqd->update_mqd = update_mqd_hiq;
+ mqd->destroy_mqd = destroy_mqd;
+ mqd->is_occupied = is_occupied;
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+ break;
+ case KFD_MQD_TYPE_SDMA:
+ mqd->init_mqd = init_mqd_sdma;
+ mqd->uninit_mqd = uninit_mqd_sdma;
+ mqd->load_mqd = load_mqd_sdma;
+ mqd->update_mqd = update_mqd_sdma;
+ mqd->destroy_mqd = destroy_mqd_sdma;
+ mqd->is_occupied = is_occupied_sdma;
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
+#endif
+ break;
+ default:
+ kfree(mqd);
+ return NULL;
+ }
+
+ return mqd;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index 89e4242e43e7..481307b8b4db 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -394,7 +394,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
return NULL;
- mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
+ mqd = kzalloc(sizeof(*mqd), GFP_NOIO);
if (!mqd)
return NULL;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 89ba4c670ec5..c317feb43f69 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -26,8 +26,6 @@
#include "kfd_device_queue_manager.h"
#include "kfd_kernel_queue.h"
#include "kfd_priv.h"
-#include "kfd_pm4_headers_vi.h"
-#include "kfd_pm4_opcodes.h"
static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
unsigned int buffer_size_bytes)
@@ -39,18 +37,6 @@ static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
*wptr = temp;
}
-static unsigned int build_pm4_header(unsigned int opcode, size_t packet_size)
-{
- union PM4_MES_TYPE_3_HEADER header;
-
- header.u32All = 0;
- header.opcode = opcode;
- header.count = packet_size / 4 - 2;
- header.type = PM4_TYPE_3;
-
- return header.u32All;
-}
-
static void pm_calc_rlib_size(struct packet_manager *pm,
unsigned int *rlib_size,
bool *over_subscription)
@@ -80,9 +66,9 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
pr_debug("Over subscribed runlist\n");
}
- map_queue_size = sizeof(struct pm4_mes_map_queues);
+ map_queue_size = pm->pmf->map_queues_size;
/* calculate run list ib allocation size */
- *rlib_size = process_count * sizeof(struct pm4_mes_map_process) +
+ *rlib_size = process_count * pm->pmf->map_process_size +
queue_count * map_queue_size;
/*
@@ -90,7 +76,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
* when over subscription
*/
if (*over_subscription)
- *rlib_size += sizeof(struct pm4_mes_runlist);
+ *rlib_size += pm->pmf->runlist_size;
pr_debug("runlist ib size %d\n", *rlib_size);
}
@@ -108,12 +94,14 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription);
+ mutex_lock(&pm->lock);
+
retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size,
&pm->ib_buffer_obj);
if (retval) {
pr_err("Failed to allocate runlist IB\n");
- return retval;
+ goto out;
}
*(void **)rl_buffer = pm->ib_buffer_obj->cpu_ptr;
@@ -121,138 +109,10 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
memset(*rl_buffer, 0, *rl_buffer_size);
pm->allocated = true;
- return retval;
-}
-
-static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer,
- uint64_t ib, size_t ib_size_in_dwords, bool chain)
-{
- struct pm4_mes_runlist *packet;
- int concurrent_proc_cnt = 0;
- struct kfd_dev *kfd = pm->dqm->dev;
-
- if (WARN_ON(!ib))
- return -EFAULT;
-
- /* Determine the number of processes to map together to HW:
- * it can not exceed the number of VMIDs available to the
- * scheduler, and it is determined by the smaller of the number
- * of processes in the runlist and kfd module parameter
- * hws_max_conc_proc.
- * Note: the arbitration between the number of VMIDs and
- * hws_max_conc_proc has been done in
- * kgd2kfd_device_init().
- */
- concurrent_proc_cnt = min(pm->dqm->processes_count,
- kfd->max_proc_per_quantum);
-
- packet = (struct pm4_mes_runlist *)buffer;
-
- memset(buffer, 0, sizeof(struct pm4_mes_runlist));
- packet->header.u32All = build_pm4_header(IT_RUN_LIST,
- sizeof(struct pm4_mes_runlist));
-
- packet->bitfields4.ib_size = ib_size_in_dwords;
- packet->bitfields4.chain = chain ? 1 : 0;
- packet->bitfields4.offload_polling = 0;
- packet->bitfields4.valid = 1;
- packet->bitfields4.process_cnt = concurrent_proc_cnt;
- packet->ordinal2 = lower_32_bits(ib);
- packet->bitfields3.ib_base_hi = upper_32_bits(ib);
-
- return 0;
-}
-
-static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer,
- struct qcm_process_device *qpd)
-{
- struct pm4_mes_map_process *packet;
-
- packet = (struct pm4_mes_map_process *)buffer;
- memset(buffer, 0, sizeof(struct pm4_mes_map_process));
-
- packet->header.u32All = build_pm4_header(IT_MAP_PROCESS,
- sizeof(struct pm4_mes_map_process));
- packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
- packet->bitfields2.process_quantum = 1;
- packet->bitfields2.pasid = qpd->pqm->process->pasid;
- packet->bitfields3.page_table_base = qpd->page_table_base;
- packet->bitfields10.gds_size = qpd->gds_size;
- packet->bitfields10.num_gws = qpd->num_gws;
- packet->bitfields10.num_oac = qpd->num_oac;
- packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
-
- packet->sh_mem_config = qpd->sh_mem_config;
- packet->sh_mem_bases = qpd->sh_mem_bases;
- packet->sh_mem_ape1_base = qpd->sh_mem_ape1_base;
- packet->sh_mem_ape1_limit = qpd->sh_mem_ape1_limit;
-
- packet->sh_hidden_private_base_vmid = qpd->sh_hidden_private_base;
-
- packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
- packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
-
- return 0;
-}
-
-static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
- struct queue *q, bool is_static)
-{
- struct pm4_mes_map_queues *packet;
- bool use_static = is_static;
-
- packet = (struct pm4_mes_map_queues *)buffer;
- memset(buffer, 0, sizeof(struct pm4_mes_map_queues));
-
- packet->header.u32All = build_pm4_header(IT_MAP_QUEUES,
- sizeof(struct pm4_mes_map_queues));
- packet->bitfields2.alloc_format =
- alloc_format__mes_map_queues__one_per_pipe_vi;
- packet->bitfields2.num_queues = 1;
- packet->bitfields2.queue_sel =
- queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
-
- packet->bitfields2.engine_sel =
- engine_sel__mes_map_queues__compute_vi;
- packet->bitfields2.queue_type =
- queue_type__mes_map_queues__normal_compute_vi;
-
- switch (q->properties.type) {
- case KFD_QUEUE_TYPE_COMPUTE:
- if (use_static)
- packet->bitfields2.queue_type =
- queue_type__mes_map_queues__normal_latency_static_queue_vi;
- break;
- case KFD_QUEUE_TYPE_DIQ:
- packet->bitfields2.queue_type =
- queue_type__mes_map_queues__debug_interface_queue_vi;
- break;
- case KFD_QUEUE_TYPE_SDMA:
- packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
- engine_sel__mes_map_queues__sdma0_vi;
- use_static = false; /* no static queues under SDMA */
- break;
- default:
- WARN(1, "queue type %d", q->properties.type);
- return -EINVAL;
- }
- packet->bitfields3.doorbell_offset =
- q->properties.doorbell_off;
-
- packet->mqd_addr_lo =
- lower_32_bits(q->gart_mqd_addr);
-
- packet->mqd_addr_hi =
- upper_32_bits(q->gart_mqd_addr);
-
- packet->wptr_addr_lo =
- lower_32_bits((uint64_t)q->properties.write_ptr);
-
- packet->wptr_addr_hi =
- upper_32_bits((uint64_t)q->properties.write_ptr);
-
- return 0;
+out:
+ mutex_unlock(&pm->lock);
+ return retval;
}
static int pm_create_runlist_ib(struct packet_manager *pm,
@@ -292,12 +152,12 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
return -ENOMEM;
}
- retval = pm_create_map_process(pm, &rl_buffer[rl_wptr], qpd);
+ retval = pm->pmf->map_process(pm, &rl_buffer[rl_wptr], qpd);
if (retval)
return retval;
proccesses_mapped++;
- inc_wptr(&rl_wptr, sizeof(struct pm4_mes_map_process),
+ inc_wptr(&rl_wptr, pm->pmf->map_process_size,
alloc_size_bytes);
list_for_each_entry(kq, &qpd->priv_queue_list, list) {
@@ -307,7 +167,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
pr_debug("static_queue, mapping kernel q %d, is debug status %d\n",
kq->queue->queue, qpd->is_debug);
- retval = pm_create_map_queue(pm,
+ retval = pm->pmf->map_queues(pm,
&rl_buffer[rl_wptr],
kq->queue,
qpd->is_debug);
@@ -315,7 +175,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
return retval;
inc_wptr(&rl_wptr,
- sizeof(struct pm4_mes_map_queues),
+ pm->pmf->map_queues_size,
alloc_size_bytes);
}
@@ -326,7 +186,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
pr_debug("static_queue, mapping user queue %d, is debug status %d\n",
q->queue, qpd->is_debug);
- retval = pm_create_map_queue(pm,
+ retval = pm->pmf->map_queues(pm,
&rl_buffer[rl_wptr],
q,
qpd->is_debug);
@@ -335,7 +195,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
return retval;
inc_wptr(&rl_wptr,
- sizeof(struct pm4_mes_map_queues),
+ pm->pmf->map_queues_size,
alloc_size_bytes);
}
}
@@ -343,7 +203,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
pr_debug("Finished map process and queues to runlist\n");
if (is_over_subscription)
- retval = pm_create_runlist(pm, &rl_buffer[rl_wptr],
+ retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr],
*rl_gpu_addr,
alloc_size_bytes / sizeof(uint32_t),
true);
@@ -355,45 +215,29 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
return retval;
}
-/* pm_create_release_mem - Create a RELEASE_MEM packet and return the size
- * of this packet
- * @gpu_addr - GPU address of the packet. It's a virtual address.
- * @buffer - buffer to fill up with the packet. It's a CPU kernel pointer
- * Return - length of the packet
- */
-uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer)
-{
- struct pm4_mec_release_mem *packet;
-
- WARN_ON(!buffer);
-
- packet = (struct pm4_mec_release_mem *)buffer;
- memset(buffer, 0, sizeof(*packet));
-
- packet->header.u32All = build_pm4_header(IT_RELEASE_MEM,
- sizeof(*packet));
-
- packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
- packet->bitfields2.event_index = event_index___release_mem__end_of_pipe;
- packet->bitfields2.tcl1_action_ena = 1;
- packet->bitfields2.tc_action_ena = 1;
- packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
- packet->bitfields2.atc = 0;
-
- packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low;
- packet->bitfields3.int_sel =
- int_sel___release_mem__send_interrupt_after_write_confirm;
-
- packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
- packet->address_hi = upper_32_bits(gpu_addr);
-
- packet->data_lo = 0;
-
- return sizeof(*packet) / sizeof(unsigned int);
-}
-
int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
{
+ switch (dqm->dev->device_info->asic_family) {
+ case CHIP_KAVERI:
+ case CHIP_HAWAII:
+ /* PM4 packet structures on CIK are the same as on VI */
+ case CHIP_CARRIZO:
+ case CHIP_TONGA:
+ case CHIP_FIJI:
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ pm->pmf = &kfd_vi_pm_funcs;
+ break;
+ case CHIP_VEGA10:
+ case CHIP_RAVEN:
+ pm->pmf = &kfd_v9_pm_funcs;
+ break;
+ default:
+ WARN(1, "Unexpected ASIC family %u",
+ dqm->dev->device_info->asic_family);
+ return -EINVAL;
+ }
+
pm->dqm = dqm;
mutex_init(&pm->lock);
pm->priv_queue = kernel_queue_init(dqm->dev, KFD_QUEUE_TYPE_HIQ);
@@ -415,38 +259,25 @@ void pm_uninit(struct packet_manager *pm)
int pm_send_set_resources(struct packet_manager *pm,
struct scheduling_resources *res)
{
- struct pm4_mes_set_resources *packet;
+ uint32_t *buffer, size;
int retval = 0;
+ size = pm->pmf->set_resources_size;
mutex_lock(&pm->lock);
pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
- sizeof(*packet) / sizeof(uint32_t),
- (unsigned int **)&packet);
- if (!packet) {
+ size / sizeof(uint32_t),
+ (unsigned int **)&buffer);
+ if (!buffer) {
pr_err("Failed to allocate buffer on kernel queue\n");
retval = -ENOMEM;
goto out;
}
- memset(packet, 0, sizeof(struct pm4_mes_set_resources));
- packet->header.u32All = build_pm4_header(IT_SET_RESOURCES,
- sizeof(struct pm4_mes_set_resources));
-
- packet->bitfields2.queue_type =
- queue_type__mes_set_resources__hsa_interface_queue_hiq;
- packet->bitfields2.vmid_mask = res->vmid_mask;
- packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100;
- packet->bitfields7.oac_mask = res->oac_mask;
- packet->bitfields8.gds_heap_base = res->gds_heap_base;
- packet->bitfields8.gds_heap_size = res->gds_heap_size;
-
- packet->gws_mask_lo = lower_32_bits(res->gws_mask);
- packet->gws_mask_hi = upper_32_bits(res->gws_mask);
-
- packet->queue_mask_lo = lower_32_bits(res->queue_mask);
- packet->queue_mask_hi = upper_32_bits(res->queue_mask);
-
- pm->priv_queue->ops.submit_packet(pm->priv_queue);
+ retval = pm->pmf->set_resources(pm, buffer, res);
+ if (!retval)
+ pm->priv_queue->ops.submit_packet(pm->priv_queue);
+ else
+ pm->priv_queue->ops.rollback_packet(pm->priv_queue);
out:
mutex_unlock(&pm->lock);
@@ -468,7 +299,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr);
- packet_size_dwords = sizeof(struct pm4_mes_runlist) / sizeof(uint32_t);
+ packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t);
mutex_lock(&pm->lock);
retval = pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
@@ -476,7 +307,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
if (retval)
goto fail_acquire_packet_buffer;
- retval = pm_create_runlist(pm, rl_buffer, rl_gpu_ib_addr,
+ retval = pm->pmf->runlist(pm, rl_buffer, rl_gpu_ib_addr,
rl_ib_size / sizeof(uint32_t), false);
if (retval)
goto fail_create_runlist;
@@ -499,37 +330,29 @@ fail_create_runlist_ib:
int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
uint32_t fence_value)
{
- int retval;
- struct pm4_mes_query_status *packet;
+ uint32_t *buffer, size;
+ int retval = 0;
if (WARN_ON(!fence_address))
return -EFAULT;
+ size = pm->pmf->query_status_size;
mutex_lock(&pm->lock);
- retval = pm->priv_queue->ops.acquire_packet_buffer(
- pm->priv_queue,
- sizeof(struct pm4_mes_query_status) / sizeof(uint32_t),
- (unsigned int **)&packet);
- if (retval)
- goto fail_acquire_packet_buffer;
-
- packet->header.u32All = build_pm4_header(IT_QUERY_STATUS,
- sizeof(struct pm4_mes_query_status));
-
- packet->bitfields2.context_id = 0;
- packet->bitfields2.interrupt_sel =
- interrupt_sel__mes_query_status__completion_status;
- packet->bitfields2.command =
- command__mes_query_status__fence_only_after_write_ack;
-
- packet->addr_hi = upper_32_bits((uint64_t)fence_address);
- packet->addr_lo = lower_32_bits((uint64_t)fence_address);
- packet->data_hi = upper_32_bits((uint64_t)fence_value);
- packet->data_lo = lower_32_bits((uint64_t)fence_value);
+ pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
+ size / sizeof(uint32_t), (unsigned int **)&buffer);
+ if (!buffer) {
+ pr_err("Failed to allocate buffer on kernel queue\n");
+ retval = -ENOMEM;
+ goto out;
+ }
- pm->priv_queue->ops.submit_packet(pm->priv_queue);
+ retval = pm->pmf->query_status(pm, buffer, fence_address, fence_value);
+ if (!retval)
+ pm->priv_queue->ops.submit_packet(pm->priv_queue);
+ else
+ pm->priv_queue->ops.rollback_packet(pm->priv_queue);
-fail_acquire_packet_buffer:
+out:
mutex_unlock(&pm->lock);
return retval;
}
@@ -539,82 +362,27 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
uint32_t filter_param, bool reset,
unsigned int sdma_engine)
{
- int retval;
- uint32_t *buffer;
- struct pm4_mes_unmap_queues *packet;
+ uint32_t *buffer, size;
+ int retval = 0;
+ size = pm->pmf->unmap_queues_size;
mutex_lock(&pm->lock);
- retval = pm->priv_queue->ops.acquire_packet_buffer(
- pm->priv_queue,
- sizeof(struct pm4_mes_unmap_queues) / sizeof(uint32_t),
- &buffer);
- if (retval)
- goto err_acquire_packet_buffer;
-
- packet = (struct pm4_mes_unmap_queues *)buffer;
- memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues));
- pr_debug("static_queue: unmapping queues: filter is %d , reset is %d , type is %d\n",
- filter, reset, type);
- packet->header.u32All = build_pm4_header(IT_UNMAP_QUEUES,
- sizeof(struct pm4_mes_unmap_queues));
- switch (type) {
- case KFD_QUEUE_TYPE_COMPUTE:
- case KFD_QUEUE_TYPE_DIQ:
- packet->bitfields2.engine_sel =
- engine_sel__mes_unmap_queues__compute;
- break;
- case KFD_QUEUE_TYPE_SDMA:
- packet->bitfields2.engine_sel =
- engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
- break;
- default:
- WARN(1, "queue type %d", type);
- retval = -EINVAL;
- goto err_invalid;
+ pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
+ size / sizeof(uint32_t), (unsigned int **)&buffer);
+ if (!buffer) {
+ pr_err("Failed to allocate buffer on kernel queue\n");
+ retval = -ENOMEM;
+ goto out;
}
- if (reset)
- packet->bitfields2.action =
- action__mes_unmap_queues__reset_queues;
+ retval = pm->pmf->unmap_queues(pm, buffer, type, filter, filter_param,
+ reset, sdma_engine);
+ if (!retval)
+ pm->priv_queue->ops.submit_packet(pm->priv_queue);
else
- packet->bitfields2.action =
- action__mes_unmap_queues__preempt_queues;
-
- switch (filter) {
- case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE:
- packet->bitfields2.queue_sel =
- queue_sel__mes_unmap_queues__perform_request_on_specified_queues;
- packet->bitfields2.num_queues = 1;
- packet->bitfields3b.doorbell_offset0 = filter_param;
- break;
- case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
- packet->bitfields2.queue_sel =
- queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
- packet->bitfields3a.pasid = filter_param;
- break;
- case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES:
- packet->bitfields2.queue_sel =
- queue_sel__mes_unmap_queues__unmap_all_queues;
- break;
- case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES:
- /* in this case, we do not preempt static queues */
- packet->bitfields2.queue_sel =
- queue_sel__mes_unmap_queues__unmap_all_non_static_queues;
- break;
- default:
- WARN(1, "filter %d", filter);
- retval = -EINVAL;
- goto err_invalid;
- }
+ pm->priv_queue->ops.rollback_packet(pm->priv_queue);
- pm->priv_queue->ops.submit_packet(pm->priv_queue);
-
- mutex_unlock(&pm->lock);
- return 0;
-
-err_invalid:
- pm->priv_queue->ops.rollback_packet(pm->priv_queue);
-err_acquire_packet_buffer:
+out:
mutex_unlock(&pm->lock);
return retval;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
new file mode 100644
index 000000000000..f2bcf5c092ea
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
@@ -0,0 +1,583 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef F32_MES_PM4_PACKETS_H
+#define F32_MES_PM4_PACKETS_H
+
+#ifndef PM4_MES_HEADER_DEFINED
+#define PM4_MES_HEADER_DEFINED
+union PM4_MES_TYPE_3_HEADER {
+ struct {
+ uint32_t reserved1 : 8; /* < reserved */
+ uint32_t opcode : 8; /* < IT opcode */
+ uint32_t count : 14;/* < number of DWORDs - 1 in the
+ * information body.
+ */
+ uint32_t type : 2; /* < packet identifier.
+ * It should be 3 for type 3 packets
+ */
+ };
+ uint32_t u32All;
+};
+#endif /* PM4_MES_HEADER_DEFINED */
+
+/*--------------------MES_SET_RESOURCES--------------------*/
+
+#ifndef PM4_MES_SET_RESOURCES_DEFINED
+#define PM4_MES_SET_RESOURCES_DEFINED
+enum mes_set_resources_queue_type_enum {
+ queue_type__mes_set_resources__kernel_interface_queue_kiq = 0,
+ queue_type__mes_set_resources__hsa_interface_queue_hiq = 1,
+ queue_type__mes_set_resources__hsa_debug_interface_queue = 4
+};
+
+
+struct pm4_mes_set_resources {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
+ };
+
+ union {
+ struct {
+ uint32_t vmid_mask:16;
+ uint32_t unmap_latency:8;
+ uint32_t reserved1:5;
+ enum mes_set_resources_queue_type_enum queue_type:3;
+ } bitfields2;
+ uint32_t ordinal2;
+ };
+
+ uint32_t queue_mask_lo;
+ uint32_t queue_mask_hi;
+ uint32_t gws_mask_lo;
+ uint32_t gws_mask_hi;
+
+ union {
+ struct {
+ uint32_t oac_mask:16;
+ uint32_t reserved2:16;
+ } bitfields7;
+ uint32_t ordinal7;
+ };
+
+ union {
+ struct {
+ uint32_t gds_heap_base:6;
+ uint32_t reserved3:5;
+ uint32_t gds_heap_size:6;
+ uint32_t reserved4:15;
+ } bitfields8;
+ uint32_t ordinal8;
+ };
+
+};
+#endif
+
+/*--------------------MES_RUN_LIST--------------------*/
+
+#ifndef PM4_MES_RUN_LIST_DEFINED
+#define PM4_MES_RUN_LIST_DEFINED
+
+struct pm4_mes_runlist {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
+ };
+
+ union {
+ struct {
+ uint32_t reserved1:2;
+ uint32_t ib_base_lo:30;
+ } bitfields2;
+ uint32_t ordinal2;
+ };
+
+ uint32_t ib_base_hi;
+
+ union {
+ struct {
+ uint32_t ib_size:20;
+ uint32_t chain:1;
+ uint32_t offload_polling:1;
+ uint32_t reserved2:1;
+ uint32_t valid:1;
+ uint32_t process_cnt:4;
+ uint32_t reserved3:4;
+ } bitfields4;
+ uint32_t ordinal4;
+ };
+
+};
+#endif
+
+/*--------------------MES_MAP_PROCESS--------------------*/
+
+#ifndef PM4_MES_MAP_PROCESS_DEFINED
+#define PM4_MES_MAP_PROCESS_DEFINED
+
+struct pm4_mes_map_process {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
+ };
+
+ union {
+ struct {
+ uint32_t pasid:16;
+ uint32_t reserved1:8;
+ uint32_t diq_enable:1;
+ uint32_t process_quantum:7;
+ } bitfields2;
+ uint32_t ordinal2;
+ };
+
+ uint32_t vm_context_page_table_base_addr_lo32;
+
+ uint32_t vm_context_page_table_base_addr_hi32;
+
+ uint32_t sh_mem_bases;
+
+ uint32_t sh_mem_config;
+
+ uint32_t sq_shader_tba_lo;
+
+ uint32_t sq_shader_tba_hi;
+
+ uint32_t sq_shader_tma_lo;
+
+ uint32_t sq_shader_tma_hi;
+
+ uint32_t reserved6;
+
+ uint32_t gds_addr_lo;
+
+ uint32_t gds_addr_hi;
+
+ union {
+ struct {
+ uint32_t num_gws:6;
+ uint32_t reserved7:1;
+ uint32_t sdma_enable:1;
+ uint32_t num_oac:4;
+ uint32_t reserved8:4;
+ uint32_t gds_size:6;
+ uint32_t num_queues:10;
+ } bitfields14;
+ uint32_t ordinal14;
+ };
+
+ uint32_t completion_signal_lo;
+
+ uint32_t completion_signal_hi;
+
+};
+
+#endif
+
+/*--------------------MES_MAP_PROCESS_VM--------------------*/
+
+#ifndef PM4_MES_MAP_PROCESS_VM_DEFINED
+#define PM4_MES_MAP_PROCESS_VM_DEFINED
+
+struct PM4_MES_MAP_PROCESS_VM {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
+ };
+
+ uint32_t reserved1;
+
+ uint32_t vm_context_cntl;
+
+ uint32_t reserved2;
+
+ uint32_t vm_context_page_table_end_addr_lo32;
+
+ uint32_t vm_context_page_table_end_addr_hi32;
+
+ uint32_t vm_context_page_table_start_addr_lo32;
+
+ uint32_t vm_context_page_table_start_addr_hi32;
+
+ uint32_t reserved3;
+
+ uint32_t reserved4;
+
+ uint32_t reserved5;
+
+ uint32_t reserved6;
+
+ uint32_t reserved7;
+
+ uint32_t reserved8;
+
+ uint32_t completion_signal_lo32;
+
+ uint32_t completion_signal_hi32;
+
+};
+#endif
+
+/*--------------------MES_MAP_QUEUES--------------------*/
+
+#ifndef PM4_MES_MAP_QUEUES_VI_DEFINED
+#define PM4_MES_MAP_QUEUES_VI_DEFINED
+enum mes_map_queues_queue_sel_enum {
+ queue_sel__mes_map_queues__map_to_specified_queue_slots_vi = 0,
+queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi = 1
+};
+
+enum mes_map_queues_queue_type_enum {
+ queue_type__mes_map_queues__normal_compute_vi = 0,
+ queue_type__mes_map_queues__debug_interface_queue_vi = 1,
+ queue_type__mes_map_queues__normal_latency_static_queue_vi = 2,
+queue_type__mes_map_queues__low_latency_static_queue_vi = 3
+};
+
+enum mes_map_queues_alloc_format_enum {
+ alloc_format__mes_map_queues__one_per_pipe_vi = 0,
+alloc_format__mes_map_queues__all_on_one_pipe_vi = 1
+};
+
+enum mes_map_queues_engine_sel_enum {
+ engine_sel__mes_map_queues__compute_vi = 0,
+ engine_sel__mes_map_queues__sdma0_vi = 2,
+ engine_sel__mes_map_queues__sdma1_vi = 3
+};
+
+
+struct pm4_mes_map_queues {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
+ };
+
+ union {
+ struct {
+ uint32_t reserved1:4;
+ enum mes_map_queues_queue_sel_enum queue_sel:2;
+ uint32_t reserved2:15;
+ enum mes_map_queues_queue_type_enum queue_type:3;
+ enum mes_map_queues_alloc_format_enum alloc_format:2;
+ enum mes_map_queues_engine_sel_enum engine_sel:3;
+ uint32_t num_queues:3;
+ } bitfields2;
+ uint32_t ordinal2;
+ };
+
+ union {
+ struct {
+ uint32_t reserved3:1;
+ uint32_t check_disable:1;
+ uint32_t doorbell_offset:26;
+ uint32_t reserved4:4;
+ } bitfields3;
+ uint32_t ordinal3;
+ };
+
+ uint32_t mqd_addr_lo;
+ uint32_t mqd_addr_hi;
+ uint32_t wptr_addr_lo;
+ uint32_t wptr_addr_hi;
+};
+#endif
+
+/*--------------------MES_QUERY_STATUS--------------------*/
+
+#ifndef PM4_MES_QUERY_STATUS_DEFINED
+#define PM4_MES_QUERY_STATUS_DEFINED
+enum mes_query_status_interrupt_sel_enum {
+ interrupt_sel__mes_query_status__completion_status = 0,
+ interrupt_sel__mes_query_status__process_status = 1,
+ interrupt_sel__mes_query_status__queue_status = 2
+};
+
+enum mes_query_status_command_enum {
+ command__mes_query_status__interrupt_only = 0,
+ command__mes_query_status__fence_only_immediate = 1,
+ command__mes_query_status__fence_only_after_write_ack = 2,
+ command__mes_query_status__fence_wait_for_write_ack_send_interrupt = 3
+};
+
+enum mes_query_status_engine_sel_enum {
+ engine_sel__mes_query_status__compute = 0,
+ engine_sel__mes_query_status__sdma0_queue = 2,
+ engine_sel__mes_query_status__sdma1_queue = 3
+};
+
+struct pm4_mes_query_status {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
+ };
+
+ union {
+ struct {
+ uint32_t context_id:28;
+ enum mes_query_status_interrupt_sel_enum interrupt_sel:2;
+ enum mes_query_status_command_enum command:2;
+ } bitfields2;
+ uint32_t ordinal2;
+ };
+
+ union {
+ struct {
+ uint32_t pasid:16;
+ uint32_t reserved1:16;
+ } bitfields3a;
+ struct {
+ uint32_t reserved2:2;
+ uint32_t doorbell_offset:26;
+ enum mes_query_status_engine_sel_enum engine_sel:3;
+ uint32_t reserved3:1;
+ } bitfields3b;
+ uint32_t ordinal3;
+ };
+
+ uint32_t addr_lo;
+ uint32_t addr_hi;
+ uint32_t data_lo;
+ uint32_t data_hi;
+};
+#endif
+
+/*--------------------MES_UNMAP_QUEUES--------------------*/
+
+#ifndef PM4_MES_UNMAP_QUEUES_DEFINED
+#define PM4_MES_UNMAP_QUEUES_DEFINED
+enum mes_unmap_queues_action_enum {
+ action__mes_unmap_queues__preempt_queues = 0,
+ action__mes_unmap_queues__reset_queues = 1,
+ action__mes_unmap_queues__disable_process_queues = 2,
+ action__mes_unmap_queues__reserved = 3
+};
+
+enum mes_unmap_queues_queue_sel_enum {
+ queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0,
+ queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1,
+ queue_sel__mes_unmap_queues__unmap_all_queues = 2,
+ queue_sel__mes_unmap_queues__unmap_all_non_static_queues = 3
+};
+
+enum mes_unmap_queues_engine_sel_enum {
+ engine_sel__mes_unmap_queues__compute = 0,
+ engine_sel__mes_unmap_queues__sdma0 = 2,
+ engine_sel__mes_unmap_queues__sdmal = 3
+};
+
+struct pm4_mes_unmap_queues {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
+ };
+
+ union {
+ struct {
+ enum mes_unmap_queues_action_enum action:2;
+ uint32_t reserved1:2;
+ enum mes_unmap_queues_queue_sel_enum queue_sel:2;
+ uint32_t reserved2:20;
+ enum mes_unmap_queues_engine_sel_enum engine_sel:3;
+ uint32_t num_queues:3;
+ } bitfields2;
+ uint32_t ordinal2;
+ };
+
+ union {
+ struct {
+ uint32_t pasid:16;
+ uint32_t reserved3:16;
+ } bitfields3a;
+ struct {
+ uint32_t reserved4:2;
+ uint32_t doorbell_offset0:26;
+ int32_t reserved5:4;
+ } bitfields3b;
+ uint32_t ordinal3;
+ };
+
+ union {
+ struct {
+ uint32_t reserved6:2;
+ uint32_t doorbell_offset1:26;
+ uint32_t reserved7:4;
+ } bitfields4;
+ uint32_t ordinal4;
+ };
+
+ union {
+ struct {
+ uint32_t reserved8:2;
+ uint32_t doorbell_offset2:26;
+ uint32_t reserved9:4;
+ } bitfields5;
+ uint32_t ordinal5;
+ };
+
+ union {
+ struct {
+ uint32_t reserved10:2;
+ uint32_t doorbell_offset3:26;
+ uint32_t reserved11:4;
+ } bitfields6;
+ uint32_t ordinal6;
+ };
+};
+#endif
+
+#ifndef PM4_MEC_RELEASE_MEM_DEFINED
+#define PM4_MEC_RELEASE_MEM_DEFINED
+
+enum mec_release_mem_event_index_enum {
+ event_index__mec_release_mem__end_of_pipe = 5,
+ event_index__mec_release_mem__shader_done = 6
+};
+
+enum mec_release_mem_cache_policy_enum {
+ cache_policy__mec_release_mem__lru = 0,
+ cache_policy__mec_release_mem__stream = 1
+};
+
+enum mec_release_mem_pq_exe_status_enum {
+ pq_exe_status__mec_release_mem__default = 0,
+ pq_exe_status__mec_release_mem__phase_update = 1
+};
+
+enum mec_release_mem_dst_sel_enum {
+ dst_sel__mec_release_mem__memory_controller = 0,
+ dst_sel__mec_release_mem__tc_l2 = 1,
+ dst_sel__mec_release_mem__queue_write_pointer_register = 2,
+ dst_sel__mec_release_mem__queue_write_pointer_poll_mask_bit = 3
+};
+
+enum mec_release_mem_int_sel_enum {
+ int_sel__mec_release_mem__none = 0,
+ int_sel__mec_release_mem__send_interrupt_only = 1,
+ int_sel__mec_release_mem__send_interrupt_after_write_confirm = 2,
+ int_sel__mec_release_mem__send_data_after_write_confirm = 3,
+ int_sel__mec_release_mem__unconditionally_send_int_ctxid = 4,
+ int_sel__mec_release_mem__conditionally_send_int_ctxid_based_on_32_bit_compare = 5,
+ int_sel__mec_release_mem__conditionally_send_int_ctxid_based_on_64_bit_compare = 6
+};
+
+enum mec_release_mem_data_sel_enum {
+ data_sel__mec_release_mem__none = 0,
+ data_sel__mec_release_mem__send_32_bit_low = 1,
+ data_sel__mec_release_mem__send_64_bit_data = 2,
+ data_sel__mec_release_mem__send_gpu_clock_counter = 3,
+ data_sel__mec_release_mem__send_cp_perfcounter_hi_lo = 4,
+ data_sel__mec_release_mem__store_gds_data_to_memory = 5
+};
+
+struct pm4_mec_release_mem {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /*header */
+ unsigned int ordinal1;
+ };
+
+ union {
+ struct {
+ unsigned int event_type:6;
+ unsigned int reserved1:2;
+ enum mec_release_mem_event_index_enum event_index:4;
+ unsigned int tcl1_vol_action_ena:1;
+ unsigned int tc_vol_action_ena:1;
+ unsigned int reserved2:1;
+ unsigned int tc_wb_action_ena:1;
+ unsigned int tcl1_action_ena:1;
+ unsigned int tc_action_ena:1;
+ uint32_t reserved3:1;
+ uint32_t tc_nc_action_ena:1;
+ uint32_t tc_wc_action_ena:1;
+ uint32_t tc_md_action_ena:1;
+ uint32_t reserved4:3;
+ enum mec_release_mem_cache_policy_enum cache_policy:2;
+ uint32_t reserved5:2;
+ enum mec_release_mem_pq_exe_status_enum pq_exe_status:1;
+ uint32_t reserved6:2;
+ } bitfields2;
+ unsigned int ordinal2;
+ };
+
+ union {
+ struct {
+ uint32_t reserved7:16;
+ enum mec_release_mem_dst_sel_enum dst_sel:2;
+ uint32_t reserved8:6;
+ enum mec_release_mem_int_sel_enum int_sel:3;
+ uint32_t reserved9:2;
+ enum mec_release_mem_data_sel_enum data_sel:3;
+ } bitfields3;
+ unsigned int ordinal3;
+ };
+
+ union {
+ struct {
+ uint32_t reserved10:2;
+ unsigned int address_lo_32b:30;
+ } bitfields4;
+ struct {
+ uint32_t reserved11:3;
+ uint32_t address_lo_64b:29;
+ } bitfields4b;
+ uint32_t reserved12;
+ unsigned int ordinal4;
+ };
+
+ union {
+ uint32_t address_hi;
+ uint32_t reserved13;
+ uint32_t ordinal5;
+ };
+
+ union {
+ uint32_t data_lo;
+ uint32_t cmp_data_lo;
+ struct {
+ uint32_t dw_offset:16;
+ uint32_t num_dwords:16;
+ } bitfields6c;
+ uint32_t reserved14;
+ uint32_t ordinal6;
+ };
+
+ union {
+ uint32_t data_hi;
+ uint32_t cmp_data_hi;
+ uint32_t reserved15;
+ uint32_t reserved16;
+ uint32_t ordinal7;
+ };
+
+ uint32_t int_ctxid;
+
+};
+
+#endif
+
+enum {
+ CACHE_FLUSH_AND_INV_TS_EVENT = 0x00000014
+};
+#endif
+
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 96a9cc0f02c9..5e3990bb4c4b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -39,11 +39,37 @@
#include "amd_shared.h"
+#define KFD_MAX_RING_ENTRY_SIZE 8
+
#define KFD_SYSFS_FILE_MODE 0444
-#define KFD_MMAP_DOORBELL_MASK 0x8000000000000ull
-#define KFD_MMAP_EVENTS_MASK 0x4000000000000ull
-#define KFD_MMAP_RESERVED_MEM_MASK 0x2000000000000ull
+/* GPU ID hash width in bits */
+#define KFD_GPU_ID_HASH_WIDTH 16
+
+/* Use upper bits of mmap offset to store KFD driver specific information.
+ * BITS[63:62] - Encode MMAP type
+ * BITS[61:46] - Encode gpu_id. To identify to which GPU the offset belongs to
+ * BITS[45:0] - MMAP offset value
+ *
+ * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
+ * defines are w.r.t to PAGE_SIZE
+ */
+#define KFD_MMAP_TYPE_SHIFT (62 - PAGE_SHIFT)
+#define KFD_MMAP_TYPE_MASK (0x3ULL << KFD_MMAP_TYPE_SHIFT)
+#define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT)
+#define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT)
+#define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT)
+
+#define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT)
+#define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \
+ << KFD_MMAP_GPU_ID_SHIFT)
+#define KFD_MMAP_GPU_ID(gpu_id) ((((uint64_t)gpu_id) << KFD_MMAP_GPU_ID_SHIFT)\
+ & KFD_MMAP_GPU_ID_MASK)
+#define KFD_MMAP_GPU_ID_GET(offset) ((offset & KFD_MMAP_GPU_ID_MASK) \
+ >> KFD_MMAP_GPU_ID_SHIFT)
+
+#define KFD_MMAP_OFFSET_VALUE_MASK (0x3FFFFFFFFFFFULL >> PAGE_SHIFT)
+#define KFD_MMAP_OFFSET_VALUE_GET(offset) (offset & KFD_MMAP_OFFSET_VALUE_MASK)
/*
* When working with cp scheduler we should assign the HIQ manually or via
@@ -55,9 +81,6 @@
#define KFD_CIK_HIQ_PIPE 4
#define KFD_CIK_HIQ_QUEUE 0
-/* GPU ID hash width in bits */
-#define KFD_GPU_ID_HASH_WIDTH 16
-
/* Macro for allocating structures */
#define kfd_alloc_struct(ptr_to_struct) \
((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL))
@@ -116,6 +139,11 @@ extern int debug_largebar;
*/
extern int ignore_crat;
+/*
+ * Set sh_mem_config.retry_disable on Vega10
+ */
+extern int vega10_noretry;
+
/**
* enum kfd_sched_policy
*
@@ -148,6 +176,8 @@ enum cache_policy {
cache_policy_noncoherent
};
+#define KFD_IS_SOC15(chip) ((chip) >= CHIP_VEGA10)
+
struct kfd_event_interrupt_class {
bool (*interrupt_isr)(struct kfd_dev *dev,
const uint32_t *ih_ring_entry);
@@ -160,6 +190,7 @@ struct kfd_device_info {
const struct kfd_event_interrupt_class *event_interrupt_class;
unsigned int max_pasid_bits;
unsigned int max_no_of_hqd;
+ unsigned int doorbell_size;
size_t ih_ring_entry_size;
uint8_t num_of_watch_points;
uint16_t mqd_size_aligned;
@@ -173,6 +204,7 @@ struct kfd_mem_obj {
uint32_t range_end;
uint64_t gpu_addr;
uint32_t *cpu_ptr;
+ void *gtt_mem;
};
struct kfd_vmid_info {
@@ -364,7 +396,7 @@ struct queue_properties {
uint32_t queue_percent;
uint32_t *read_ptr;
uint32_t *write_ptr;
- uint32_t __iomem *doorbell_ptr;
+ void __iomem *doorbell_ptr;
uint32_t doorbell_off;
bool is_interop;
bool is_evicted;
@@ -427,6 +459,7 @@ struct queue {
uint32_t queue;
unsigned int sdma_id;
+ unsigned int doorbell_id;
struct kfd_process *process;
struct kfd_dev *device;
@@ -501,6 +534,9 @@ struct qcm_process_device {
/* IB memory */
uint64_t ib_base;
void *ib_kaddr;
+
+ /* doorbell resources per process per device */
+ unsigned long *doorbell_bitmap;
};
/* KFD Memory Eviction */
@@ -512,6 +548,8 @@ struct qcm_process_device {
/* Approx. time before evicting the process again */
#define PROCESS_ACTIVE_TIME_MS 10
+int kgd2kfd_quiesce_mm(struct mm_struct *mm);
+int kgd2kfd_resume_mm(struct mm_struct *mm);
int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
struct dma_fence *fence);
@@ -681,6 +719,8 @@ struct kfd_process *kfd_get_process(const struct task_struct *);
struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid);
struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
void kfd_unref_process(struct kfd_process *p);
+int kfd_process_evict_queues(struct kfd_process *p);
+int kfd_process_restore_queues(struct kfd_process *p);
void kfd_suspend_all_processes(void);
int kfd_resume_all_processes(void);
@@ -693,7 +733,7 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
struct kfd_process *p);
-int kfd_reserved_mem_mmap(struct kfd_process *process,
+int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
struct vm_area_struct *vma);
/* KFD process API for creating and translating handles */
@@ -721,17 +761,20 @@ unsigned int kfd_pasid_alloc(void);
void kfd_pasid_free(unsigned int pasid);
/* Doorbells */
+size_t kfd_doorbell_process_slice(struct kfd_dev *kfd);
int kfd_doorbell_init(struct kfd_dev *kfd);
void kfd_doorbell_fini(struct kfd_dev *kfd);
-int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma);
-u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
+int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
+ struct vm_area_struct *vma);
+void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
unsigned int *doorbell_off);
void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr);
u32 read_kernel_doorbell(u32 __iomem *db);
-void write_kernel_doorbell(u32 __iomem *db, u32 value);
-unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd,
+void write_kernel_doorbell(void __iomem *db, u32 value);
+void write_kernel_doorbell64(void __iomem *db, u64 value);
+unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd,
struct kfd_process *process,
- unsigned int queue_id);
+ unsigned int doorbell_id);
phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
struct kfd_process *process);
int kfd_alloc_process_doorbells(struct kfd_process *process);
@@ -788,6 +831,8 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
struct kfd_dev *dev);
struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type,
struct kfd_dev *dev);
+struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
+ struct kfd_dev *dev);
struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev);
void device_queue_manager_uninit(struct device_queue_manager *dqm);
struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
@@ -832,8 +877,42 @@ struct packet_manager {
bool allocated;
struct kfd_mem_obj *ib_buffer_obj;
unsigned int ib_size_bytes;
+
+ const struct packet_manager_funcs *pmf;
+};
+
+struct packet_manager_funcs {
+ /* Support ASIC-specific packet formats for PM4 packets */
+ int (*map_process)(struct packet_manager *pm, uint32_t *buffer,
+ struct qcm_process_device *qpd);
+ int (*runlist)(struct packet_manager *pm, uint32_t *buffer,
+ uint64_t ib, size_t ib_size_in_dwords, bool chain);
+ int (*set_resources)(struct packet_manager *pm, uint32_t *buffer,
+ struct scheduling_resources *res);
+ int (*map_queues)(struct packet_manager *pm, uint32_t *buffer,
+ struct queue *q, bool is_static);
+ int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer,
+ enum kfd_queue_type type,
+ enum kfd_unmap_queues_filter mode,
+ uint32_t filter_param, bool reset,
+ unsigned int sdma_engine);
+ int (*query_status)(struct packet_manager *pm, uint32_t *buffer,
+ uint64_t fence_address, uint32_t fence_value);
+ int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer);
+
+ /* Packet sizes */
+ int map_process_size;
+ int runlist_size;
+ int set_resources_size;
+ int map_queues_size;
+ int unmap_queues_size;
+ int query_status_size;
+ int release_mem_size;
};
+extern const struct packet_manager_funcs kfd_vi_pm_funcs;
+extern const struct packet_manager_funcs kfd_v9_pm_funcs;
+
int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
void pm_uninit(struct packet_manager *pm);
int pm_send_set_resources(struct packet_manager *pm,
@@ -849,12 +928,17 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
void pm_release_ib(struct packet_manager *pm);
-uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer);
+/* Following PM funcs can be shared among VI and AI */
+unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size);
+int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer,
+ struct scheduling_resources *res);
uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
/* Events */
extern const struct kfd_event_interrupt_class event_interrupt_class_cik;
+extern const struct kfd_event_interrupt_class event_interrupt_class_v9;
+
extern const struct kfd_device_global_init_class device_global_init_class_cik;
void kfd_event_init_process(struct kfd_process *p);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 1711ad0642f7..1d80b4f7c681 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -332,6 +332,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
get_order(KFD_CWSR_TBA_TMA_SIZE));
+ kfree(pdd->qpd.doorbell_bitmap);
idr_destroy(&pdd->alloc_idr);
kfree(pdd);
@@ -451,7 +452,8 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
continue;
- offset = (dev->id | KFD_MMAP_RESERVED_MEM_MASK) << PAGE_SHIFT;
+ offset = (KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id))
+ << PAGE_SHIFT;
qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
MAP_SHARED, offset);
@@ -585,6 +587,31 @@ err_alloc_process:
return ERR_PTR(err);
}
+static int init_doorbell_bitmap(struct qcm_process_device *qpd,
+ struct kfd_dev *dev)
+{
+ unsigned int i;
+
+ if (!KFD_IS_SOC15(dev->device_info->asic_family))
+ return 0;
+
+ qpd->doorbell_bitmap =
+ kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
+ BITS_PER_BYTE), GFP_KERNEL);
+ if (!qpd->doorbell_bitmap)
+ return -ENOMEM;
+
+ /* Mask out any reserved doorbells */
+ for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS; i++)
+ if ((dev->shared_resources.reserved_doorbell_mask & i) ==
+ dev->shared_resources.reserved_doorbell_val) {
+ set_bit(i, qpd->doorbell_bitmap);
+ pr_debug("reserved doorbell 0x%03x\n", i);
+ }
+
+ return 0;
+}
+
struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
struct kfd_process *p)
{
@@ -606,6 +633,12 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
if (!pdd)
return NULL;
+ if (init_doorbell_bitmap(&pdd->qpd, dev)) {
+ pr_err("Failed to init doorbell for process\n");
+ kfree(pdd);
+ return NULL;
+ }
+
pdd->dev = dev;
INIT_LIST_HEAD(&pdd->qpd.queues_list);
INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
@@ -808,7 +841,7 @@ struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
* Eviction is reference-counted per process-device. This means multiple
* evictions from different sources can be nested safely.
*/
-static int process_evict_queues(struct kfd_process *p)
+int kfd_process_evict_queues(struct kfd_process *p)
{
struct kfd_process_device *pdd;
int r = 0;
@@ -844,7 +877,7 @@ fail:
}
/* process_restore_queues - Restore all user queues of a process */
-static int process_restore_queues(struct kfd_process *p)
+int kfd_process_restore_queues(struct kfd_process *p)
{
struct kfd_process_device *pdd;
int r, ret = 0;
@@ -886,7 +919,7 @@ static void evict_process_worker(struct work_struct *work)
flush_delayed_work(&p->restore_work);
pr_debug("Started evicting pasid %d\n", p->pasid);
- ret = process_evict_queues(p);
+ ret = kfd_process_evict_queues(p);
if (!ret) {
dma_fence_signal(p->ef);
dma_fence_put(p->ef);
@@ -946,7 +979,7 @@ static void restore_process_worker(struct work_struct *work)
return;
}
- ret = process_restore_queues(p);
+ ret = kfd_process_restore_queues(p);
if (!ret)
pr_debug("Finished restoring pasid %d\n", p->pasid);
else
@@ -963,7 +996,7 @@ void kfd_suspend_all_processes(void)
cancel_delayed_work_sync(&p->eviction_work);
cancel_delayed_work_sync(&p->restore_work);
- if (process_evict_queues(p))
+ if (kfd_process_evict_queues(p))
pr_err("Failed to suspend process %d\n", p->pasid);
dma_fence_signal(p->ef);
dma_fence_put(p->ef);
@@ -989,15 +1022,12 @@ int kfd_resume_all_processes(void)
return ret;
}
-int kfd_reserved_mem_mmap(struct kfd_process *process,
+int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
struct vm_area_struct *vma)
{
- struct kfd_dev *dev = kfd_device_by_id(vma->vm_pgoff);
struct kfd_process_device *pdd;
struct qcm_process_device *qpd;
- if (!dev)
- return -EINVAL;
if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) {
pr_err("Incorrect CWSR mapping size.\n");
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 7817e327ea6d..d65ce0436b31 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -119,9 +119,6 @@ static int create_cp_queue(struct process_queue_manager *pqm,
/* Doorbell initialized in user space*/
q_properties->doorbell_ptr = NULL;
- q_properties->doorbell_off =
- kfd_queue_id_to_doorbell(dev, pqm->process, qid);
-
/* let DQM handle it*/
q_properties->vmid = 0;
q_properties->queue_id = qid;
@@ -244,10 +241,20 @@ int pqm_create_queue(struct process_queue_manager *pqm,
}
if (retval != 0) {
- pr_err("DQM create queue failed\n");
+ pr_err("Pasid %d DQM create queue %d failed. ret %d\n",
+ pqm->process->pasid, type, retval);
goto err_create_queue;
}
+ if (q)
+ /* Return the doorbell offset within the doorbell page
+ * to the caller so it can be passed up to user mode
+ * (in bytes).
+ */
+ properties->doorbell_off =
+ (q->properties.doorbell_off * sizeof(uint32_t)) &
+ (kfd_doorbell_process_slice(dev) - 1);
+
pr_debug("PQM After DQM create queue\n");
list_add(&pqn->process_queue_list, &pqm->queues);
@@ -313,8 +320,11 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
dqm = pqn->q->device->dqm;
retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
if (retval) {
- pr_debug("Destroy queue failed, returned %d\n", retval);
- goto err_destroy_queue;
+ pr_err("Pasid %d destroy queue %d failed, ret %d\n",
+ pqm->process->pasid,
+ pqn->q->properties.queue_id, retval);
+ if (retval != -ETIME)
+ goto err_destroy_queue;
}
uninit_queue(pqn->q);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
index a5315d4f1c95..6dcd621e5b71 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
@@ -36,8 +36,8 @@ void print_queue_properties(struct queue_properties *q)
pr_debug("Queue Address: 0x%llX\n", q->queue_address);
pr_debug("Queue Id: %u\n", q->queue_id);
pr_debug("Queue Process Vmid: %u\n", q->vmid);
- pr_debug("Queue Read Pointer: 0x%p\n", q->read_ptr);
- pr_debug("Queue Write Pointer: 0x%p\n", q->write_ptr);
+ pr_debug("Queue Read Pointer: 0x%px\n", q->read_ptr);
+ pr_debug("Queue Write Pointer: 0x%px\n", q->write_ptr);
pr_debug("Queue Doorbell Pointer: 0x%p\n", q->doorbell_ptr);
pr_debug("Queue Doorbell Offset: %u\n", q->doorbell_off);
}
@@ -53,8 +53,8 @@ void print_queue(struct queue *q)
pr_debug("Queue Address: 0x%llX\n", q->properties.queue_address);
pr_debug("Queue Id: %u\n", q->properties.queue_id);
pr_debug("Queue Process Vmid: %u\n", q->properties.vmid);
- pr_debug("Queue Read Pointer: 0x%p\n", q->properties.read_ptr);
- pr_debug("Queue Write Pointer: 0x%p\n", q->properties.write_ptr);
+ pr_debug("Queue Read Pointer: 0x%px\n", q->properties.read_ptr);
+ pr_debug("Queue Write Pointer: 0x%px\n", q->properties.write_ptr);
pr_debug("Queue Doorbell Pointer: 0x%p\n", q->properties.doorbell_ptr);
pr_debug("Queue Doorbell Offset: %u\n", q->properties.doorbell_off);
pr_debug("Queue MQD Address: 0x%p\n", q->mqd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index ac28abc94e57..bc95d4dfee2e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1239,6 +1239,12 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
break;
+ case CHIP_VEGA10:
+ case CHIP_RAVEN:
+ dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 <<
+ HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
+ HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
+ break;
default:
WARN(1, "Unexpected ASIC family %u",
dev->gpu->device_info->asic_family);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index eb54cfcaf039..7d9c3f948dff 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -45,6 +45,7 @@
#define HSA_CAP_DOORBELL_TYPE_PRE_1_0 0x0
#define HSA_CAP_DOORBELL_TYPE_1_0 0x1
+#define HSA_CAP_DOORBELL_TYPE_2_0 0x2
#define HSA_CAP_AQL_QUEUE_DOUBLE_MAP 0x00004000
struct kfd_node_properties {
diff --git a/drivers/gpu/drm/amd/amdkfd/soc15_int.h b/drivers/gpu/drm/amd/amdkfd/soc15_int.h
new file mode 100644
index 000000000000..0bc0b25cb410
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/soc15_int.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef HSA_SOC15_INT_H_INCLUDED
+#define HSA_SOC15_INT_H_INCLUDED
+
+#include "soc15_ih_clientid.h"
+
+#define SOC15_INTSRC_CP_END_OF_PIPE 181
+#define SOC15_INTSRC_CP_BAD_OPCODE 183
+#define SOC15_INTSRC_SQ_INTERRUPT_MSG 239
+#define SOC15_INTSRC_VMC_FAULT 0
+#define SOC15_INTSRC_SDMA_TRAP 224
+
+
+#define SOC15_CLIENT_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) & 0xff)
+#define SOC15_SOURCE_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 8 & 0xff)
+#define SOC15_RING_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 16 & 0xff)
+#define SOC15_VMID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 24 & 0xf)
+#define SOC15_VMID_TYPE_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 31 & 0x1)
+#define SOC15_PASID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[3]) & 0xffff)
+#define SOC15_CONTEXT_ID0_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[4]))
+#define SOC15_CONTEXT_ID1_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[5]))
+#define SOC15_CONTEXT_ID2_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[6]))
+#define SOC15_CONTEXT_ID3_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[7]))
+
+#endif
+
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 237289a72bb7..5733fbee07f7 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -100,6 +100,21 @@ struct kgd2kfd_shared_resources {
/* Bit n == 1 means Queue n is available for KFD */
DECLARE_BITMAP(queue_bitmap, KGD_MAX_QUEUES);
+ /* Doorbell assignments (SOC15 and later chips only). Only
+ * specific doorbells are routed to each SDMA engine. Others
+ * are routed to IH and VCN. They are not usable by the CP.
+ *
+ * Any doorbell number D that satisfies the following condition
+ * is reserved: (D & reserved_doorbell_mask) == reserved_doorbell_val
+ *
+ * KFD currently uses 1024 (= 0x3ff) doorbells per process. If
+ * doorbells 0x0f0-0x0f7 and 0x2f-0x2f7 are reserved, that means
+ * mask would be set to 0x1f8 and val set to 0x0f0.
+ */
+ unsigned int sdma_doorbell[2][2];
+ unsigned int reserved_doorbell_mask;
+ unsigned int reserved_doorbell_val;
+
/* Base address of doorbell aperture. */
phys_addr_t doorbell_physical_address;
@@ -173,8 +188,6 @@ struct tile_config {
* @set_pasid_vmid_mapping: Exposes pasid/vmid pair to the H/W for no cp
* scheduling mode. Only used for no cp scheduling mode.
*
- * @init_pipeline: Initialized the compute pipelines.
- *
* @hqd_load: Loads the mqd structure to a H/W hqd slot. used only for no cp
* sceduling mode.
*
@@ -274,9 +287,6 @@ struct kfd2kgd_calls {
int (*set_pasid_vmid_mapping)(struct kgd_dev *kgd, unsigned int pasid,
unsigned int vmid);
- int (*init_pipeline)(struct kgd_dev *kgd, uint32_t pipe_id,
- uint32_t hpd_size, uint64_t hpd_gpu_addr);
-
int (*init_interrupts)(struct kgd_dev *kgd, uint32_t pipe_id);
int (*hqd_load)(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
@@ -382,6 +392,10 @@ struct kfd2kgd_calls {
*
* @resume: Notifies amdkfd about a resume action done to a kgd device
*
+ * @quiesce_mm: Quiesce all user queue access to specified MM address space
+ *
+ * @resume_mm: Resume user queue access to specified MM address space
+ *
* @schedule_evict_and_restore_process: Schedules work queue that will prepare
* for safe eviction of KFD BOs that belong to the specified process.
*
@@ -399,6 +413,8 @@ struct kgd2kfd_calls {
void (*interrupt)(struct kfd_dev *kfd, const void *ih_ring_entry);
void (*suspend)(struct kfd_dev *kfd);
int (*resume)(struct kfd_dev *kfd);
+ int (*quiesce_mm)(struct mm_struct *mm);
+ int (*resume_mm)(struct mm_struct *mm);
int (*schedule_evict_and_restore_process)(struct mm_struct *mm,
struct dma_fence *fence);
};
diff --git a/drivers/gpu/drm/amd/include/v9_structs.h b/drivers/gpu/drm/amd/include/v9_structs.h
index 2fb25abaf7c8..ceaf4932258d 100644
--- a/drivers/gpu/drm/amd/include/v9_structs.h
+++ b/drivers/gpu/drm/amd/include/v9_structs.h
@@ -29,10 +29,10 @@ struct v9_sdma_mqd {
uint32_t sdmax_rlcx_rb_base;
uint32_t sdmax_rlcx_rb_base_hi;
uint32_t sdmax_rlcx_rb_rptr;
+ uint32_t sdmax_rlcx_rb_rptr_hi;
uint32_t sdmax_rlcx_rb_wptr;
+ uint32_t sdmax_rlcx_rb_wptr_hi;
uint32_t sdmax_rlcx_rb_wptr_poll_cntl;
- uint32_t sdmax_rlcx_rb_wptr_poll_addr_hi;
- uint32_t sdmax_rlcx_rb_wptr_poll_addr_lo;
uint32_t sdmax_rlcx_rb_rptr_addr_hi;
uint32_t sdmax_rlcx_rb_rptr_addr_lo;
uint32_t sdmax_rlcx_ib_cntl;
@@ -44,29 +44,29 @@ struct v9_sdma_mqd {
uint32_t sdmax_rlcx_skip_cntl;
uint32_t sdmax_rlcx_context_status;
uint32_t sdmax_rlcx_doorbell;
- uint32_t sdmax_rlcx_virtual_addr;
- uint32_t sdmax_rlcx_ape1_cntl;
+ uint32_t sdmax_rlcx_status;
uint32_t sdmax_rlcx_doorbell_log;
- uint32_t reserved_22;
- uint32_t reserved_23;
- uint32_t reserved_24;
- uint32_t reserved_25;
- uint32_t reserved_26;
- uint32_t reserved_27;
- uint32_t reserved_28;
- uint32_t reserved_29;
- uint32_t reserved_30;
- uint32_t reserved_31;
- uint32_t reserved_32;
- uint32_t reserved_33;
- uint32_t reserved_34;
- uint32_t reserved_35;
- uint32_t reserved_36;
- uint32_t reserved_37;
- uint32_t reserved_38;
- uint32_t reserved_39;
- uint32_t reserved_40;
- uint32_t reserved_41;
+ uint32_t sdmax_rlcx_watermark;
+ uint32_t sdmax_rlcx_doorbell_offset;
+ uint32_t sdmax_rlcx_csa_addr_lo;
+ uint32_t sdmax_rlcx_csa_addr_hi;
+ uint32_t sdmax_rlcx_ib_sub_remain;
+ uint32_t sdmax_rlcx_preempt;
+ uint32_t sdmax_rlcx_dummy_reg;
+ uint32_t sdmax_rlcx_rb_wptr_poll_addr_hi;
+ uint32_t sdmax_rlcx_rb_wptr_poll_addr_lo;
+ uint32_t sdmax_rlcx_rb_aql_cntl;
+ uint32_t sdmax_rlcx_minor_ptr_update;
+ uint32_t sdmax_rlcx_midcmd_data0;
+ uint32_t sdmax_rlcx_midcmd_data1;
+ uint32_t sdmax_rlcx_midcmd_data2;
+ uint32_t sdmax_rlcx_midcmd_data3;
+ uint32_t sdmax_rlcx_midcmd_data4;
+ uint32_t sdmax_rlcx_midcmd_data5;
+ uint32_t sdmax_rlcx_midcmd_data6;
+ uint32_t sdmax_rlcx_midcmd_data7;
+ uint32_t sdmax_rlcx_midcmd_data8;
+ uint32_t sdmax_rlcx_midcmd_cntl;
uint32_t reserved_42;
uint32_t reserved_43;
uint32_t reserved_44;
diff --git a/drivers/gpu/drm/bridge/dumb-vga-dac.c b/drivers/gpu/drm/bridge/dumb-vga-dac.c
index 498d5948d1a8..9837c8d69e69 100644
--- a/drivers/gpu/drm/bridge/dumb-vga-dac.c
+++ b/drivers/gpu/drm/bridge/dumb-vga-dac.c
@@ -56,7 +56,9 @@ static int dumb_vga_get_modes(struct drm_connector *connector)
}
drm_mode_connector_update_edid_property(connector, edid);
- return drm_add_edid_modes(connector, edid);
+ ret = drm_add_edid_modes(connector, edid);
+ kfree(edid);
+ return ret;
fallback:
/*
diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig
index 735ce47688f9..208bc27be3cc 100644
--- a/drivers/gpu/drm/exynos/Kconfig
+++ b/drivers/gpu/drm/exynos/Kconfig
@@ -1,6 +1,6 @@
config DRM_EXYNOS
tristate "DRM Support for Samsung SoC EXYNOS Series"
- depends on OF && DRM && (ARCH_S3C64XX || ARCH_EXYNOS || ARCH_MULTIPLATFORM)
+ depends on OF && DRM && (ARCH_S3C64XX || ARCH_S5PV210 || ARCH_EXYNOS || ARCH_MULTIPLATFORM)
select DRM_KMS_HELPER
select VIDEOMODE_HELPERS
select SND_SOC_HDMI_CODEC if SND_SOC
@@ -95,21 +95,31 @@ config DRM_EXYNOS_G2D
help
Choose this option if you want to use Exynos G2D for DRM.
+config DRM_EXYNOS_IPP
+ bool
+
config DRM_EXYNOS_FIMC
bool "FIMC"
- depends on BROKEN && MFD_SYSCON
+ select DRM_EXYNOS_IPP
help
Choose this option if you want to use Exynos FIMC for DRM.
config DRM_EXYNOS_ROTATOR
bool "Rotator"
- depends on BROKEN
+ select DRM_EXYNOS_IPP
help
Choose this option if you want to use Exynos Rotator for DRM.
+config DRM_EXYNOS_SCALER
+ bool "Scaler"
+ select DRM_EXYNOS_IPP
+ help
+ Choose this option if you want to use Exynos Scaler for DRM.
+
config DRM_EXYNOS_GSC
bool "GScaler"
- depends on BROKEN && ARCH_EXYNOS5 && VIDEO_SAMSUNG_EXYNOS_GSC=n
+ depends on VIDEO_SAMSUNG_EXYNOS_GSC=n
+ select DRM_EXYNOS_IPP
help
Choose this option if you want to use Exynos GSC for DRM.
diff --git a/drivers/gpu/drm/exynos/Makefile b/drivers/gpu/drm/exynos/Makefile
index a51c5459bb13..3b323f1e0475 100644
--- a/drivers/gpu/drm/exynos/Makefile
+++ b/drivers/gpu/drm/exynos/Makefile
@@ -18,8 +18,10 @@ exynosdrm-$(CONFIG_DRM_EXYNOS_MIXER) += exynos_mixer.o
exynosdrm-$(CONFIG_DRM_EXYNOS_HDMI) += exynos_hdmi.o
exynosdrm-$(CONFIG_DRM_EXYNOS_VIDI) += exynos_drm_vidi.o
exynosdrm-$(CONFIG_DRM_EXYNOS_G2D) += exynos_drm_g2d.o
+exynosdrm-$(CONFIG_DRM_EXYNOS_IPP) += exynos_drm_ipp.o
exynosdrm-$(CONFIG_DRM_EXYNOS_FIMC) += exynos_drm_fimc.o
exynosdrm-$(CONFIG_DRM_EXYNOS_ROTATOR) += exynos_drm_rotator.o
+exynosdrm-$(CONFIG_DRM_EXYNOS_SCALER) += exynos_drm_scaler.o
exynosdrm-$(CONFIG_DRM_EXYNOS_GSC) += exynos_drm_gsc.o
exynosdrm-$(CONFIG_DRM_EXYNOS_MIC) += exynos_drm_mic.o
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index 39284bb7c2c2..a81b4a5e24a7 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -27,15 +27,23 @@
#include "exynos_drm_fb.h"
#include "exynos_drm_gem.h"
#include "exynos_drm_plane.h"
+#include "exynos_drm_ipp.h"
#include "exynos_drm_vidi.h"
#include "exynos_drm_g2d.h"
#include "exynos_drm_iommu.h"
#define DRIVER_NAME "exynos"
#define DRIVER_DESC "Samsung SoC DRM"
-#define DRIVER_DATE "20110530"
+#define DRIVER_DATE "20180330"
+
+/*
+ * Interface history:
+ *
+ * 1.0 - Original version
+ * 1.1 - Upgrade IPP driver to version 2.0
+ */
#define DRIVER_MAJOR 1
-#define DRIVER_MINOR 0
+#define DRIVER_MINOR 1
static int exynos_drm_open(struct drm_device *dev, struct drm_file *file)
{
@@ -88,6 +96,16 @@ static const struct drm_ioctl_desc exynos_ioctls[] = {
DRM_AUTH | DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(EXYNOS_G2D_EXEC, exynos_g2d_exec_ioctl,
DRM_AUTH | DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(EXYNOS_IPP_GET_RESOURCES,
+ exynos_drm_ipp_get_res_ioctl,
+ DRM_AUTH | DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(EXYNOS_IPP_GET_CAPS, exynos_drm_ipp_get_caps_ioctl,
+ DRM_AUTH | DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(EXYNOS_IPP_GET_LIMITS,
+ exynos_drm_ipp_get_limits_ioctl,
+ DRM_AUTH | DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(EXYNOS_IPP_COMMIT, exynos_drm_ipp_commit_ioctl,
+ DRM_AUTH | DRM_RENDER_ALLOW),
};
static const struct file_operations exynos_drm_driver_fops = {
@@ -184,6 +202,7 @@ struct exynos_drm_driver_info {
#define DRM_COMPONENT_DRIVER BIT(0) /* supports component framework */
#define DRM_VIRTUAL_DEVICE BIT(1) /* create virtual platform device */
#define DRM_DMA_DEVICE BIT(2) /* can be used for dma allocations */
+#define DRM_FIMC_DEVICE BIT(3) /* devices shared with V4L2 subsystem */
#define DRV_PTR(drv, cond) (IS_ENABLED(cond) ? &drv : NULL)
@@ -223,10 +242,16 @@ static struct exynos_drm_driver_info exynos_drm_drivers[] = {
DRV_PTR(g2d_driver, CONFIG_DRM_EXYNOS_G2D),
}, {
DRV_PTR(fimc_driver, CONFIG_DRM_EXYNOS_FIMC),
+ DRM_COMPONENT_DRIVER | DRM_FIMC_DEVICE,
}, {
DRV_PTR(rotator_driver, CONFIG_DRM_EXYNOS_ROTATOR),
+ DRM_COMPONENT_DRIVER
+ }, {
+ DRV_PTR(scaler_driver, CONFIG_DRM_EXYNOS_SCALER),
+ DRM_COMPONENT_DRIVER
}, {
DRV_PTR(gsc_driver, CONFIG_DRM_EXYNOS_GSC),
+ DRM_COMPONENT_DRIVER
}, {
&exynos_drm_platform_driver,
DRM_VIRTUAL_DEVICE
@@ -254,7 +279,11 @@ static struct component_match *exynos_drm_match_add(struct device *dev)
&info->driver->driver,
(void *)platform_bus_type.match))) {
put_device(p);
- component_match_add(dev, &match, compare_dev, d);
+
+ if (!(info->flags & DRM_FIMC_DEVICE) ||
+ exynos_drm_check_fimc_device(d) == 0)
+ component_match_add(dev, &match,
+ compare_dev, d);
p = d;
}
put_device(p);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h
index 075957cb6ba1..0f6d079a55c9 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h
@@ -273,6 +273,15 @@ static inline int exynos_dpi_bind(struct drm_device *dev,
}
#endif
+#ifdef CONFIG_DRM_EXYNOS_FIMC
+int exynos_drm_check_fimc_device(struct device *dev);
+#else
+static inline int exynos_drm_check_fimc_device(struct device *dev)
+{
+ return 0;
+}
+#endif
+
int exynos_atomic_commit(struct drm_device *dev, struct drm_atomic_state *state,
bool nonblock);
@@ -288,6 +297,7 @@ extern struct platform_driver vidi_driver;
extern struct platform_driver g2d_driver;
extern struct platform_driver fimc_driver;
extern struct platform_driver rotator_driver;
+extern struct platform_driver scaler_driver;
extern struct platform_driver gsc_driver;
extern struct platform_driver mic_driver;
#endif
diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
index 7904ffa9abfb..eae44fd714f0 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
@@ -270,7 +270,6 @@ struct exynos_dsi {
u32 lanes;
u32 mode_flags;
u32 format;
- struct videomode vm;
int state;
struct drm_property *brightness;
@@ -881,30 +880,30 @@ static int exynos_dsi_init_link(struct exynos_dsi *dsi)
static void exynos_dsi_set_display_mode(struct exynos_dsi *dsi)
{
- struct videomode *vm = &dsi->vm;
+ struct drm_display_mode *m = &dsi->encoder.crtc->state->adjusted_mode;
unsigned int num_bits_resol = dsi->driver_data->num_bits_resol;
u32 reg;
if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO) {
reg = DSIM_CMD_ALLOW(0xf)
- | DSIM_STABLE_VFP(vm->vfront_porch)
- | DSIM_MAIN_VBP(vm->vback_porch);
+ | DSIM_STABLE_VFP(m->vsync_start - m->vdisplay)
+ | DSIM_MAIN_VBP(m->vtotal - m->vsync_end);
exynos_dsi_write(dsi, DSIM_MVPORCH_REG, reg);
- reg = DSIM_MAIN_HFP(vm->hfront_porch)
- | DSIM_MAIN_HBP(vm->hback_porch);
+ reg = DSIM_MAIN_HFP(m->hsync_start - m->hdisplay)
+ | DSIM_MAIN_HBP(m->htotal - m->hsync_end);
exynos_dsi_write(dsi, DSIM_MHPORCH_REG, reg);
- reg = DSIM_MAIN_VSA(vm->vsync_len)
- | DSIM_MAIN_HSA(vm->hsync_len);
+ reg = DSIM_MAIN_VSA(m->vsync_end - m->vsync_start)
+ | DSIM_MAIN_HSA(m->hsync_end - m->hsync_start);
exynos_dsi_write(dsi, DSIM_MSYNC_REG, reg);
}
- reg = DSIM_MAIN_HRESOL(vm->hactive, num_bits_resol) |
- DSIM_MAIN_VRESOL(vm->vactive, num_bits_resol);
+ reg = DSIM_MAIN_HRESOL(m->hdisplay, num_bits_resol) |
+ DSIM_MAIN_VRESOL(m->vdisplay, num_bits_resol);
exynos_dsi_write(dsi, DSIM_MDRESOL_REG, reg);
- dev_dbg(dsi->dev, "LCD size = %dx%d\n", vm->hactive, vm->vactive);
+ dev_dbg(dsi->dev, "LCD size = %dx%d\n", m->hdisplay, m->vdisplay);
}
static void exynos_dsi_set_display_enable(struct exynos_dsi *dsi, bool enable)
@@ -1485,26 +1484,7 @@ static int exynos_dsi_create_connector(struct drm_encoder *encoder)
return 0;
}
-static void exynos_dsi_mode_set(struct drm_encoder *encoder,
- struct drm_display_mode *mode,
- struct drm_display_mode *adjusted_mode)
-{
- struct exynos_dsi *dsi = encoder_to_dsi(encoder);
- struct videomode *vm = &dsi->vm;
- struct drm_display_mode *m = adjusted_mode;
-
- vm->hactive = m->hdisplay;
- vm->vactive = m->vdisplay;
- vm->vfront_porch = m->vsync_start - m->vdisplay;
- vm->vback_porch = m->vtotal - m->vsync_end;
- vm->vsync_len = m->vsync_end - m->vsync_start;
- vm->hfront_porch = m->hsync_start - m->hdisplay;
- vm->hback_porch = m->htotal - m->hsync_end;
- vm->hsync_len = m->hsync_end - m->hsync_start;
-}
-
static const struct drm_encoder_helper_funcs exynos_dsi_encoder_helper_funcs = {
- .mode_set = exynos_dsi_mode_set,
.enable = exynos_dsi_enable,
.disable = exynos_dsi_disable,
};
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
index 5b18b5c5fdf2..4dfbfc7f3b84 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
@@ -12,6 +12,7 @@
*
*/
#include <linux/kernel.h>
+#include <linux/component.h>
#include <linux/platform_device.h>
#include <linux/mfd/syscon.h>
#include <linux/regmap.h>
@@ -24,8 +25,8 @@
#include <drm/exynos_drm.h>
#include "regs-fimc.h"
#include "exynos_drm_drv.h"
+#include "exynos_drm_iommu.h"
#include "exynos_drm_ipp.h"
-#include "exynos_drm_fimc.h"
/*
* FIMC stands for Fully Interactive Mobile Camera and
@@ -33,23 +34,6 @@
* input DMA reads image data from the memory.
* output DMA writes image data to memory.
* FIMC supports image rotation and image effect functions.
- *
- * M2M operation : supports crop/scale/rotation/csc so on.
- * Memory ----> FIMC H/W ----> Memory.
- * Writeback operation : supports cloned screen with FIMD.
- * FIMD ----> FIMC H/W ----> Memory.
- * Output operation : supports direct display using local path.
- * Memory ----> FIMC H/W ----> FIMD.
- */
-
-/*
- * TODO
- * 1. check suspend/resume api if needed.
- * 2. need to check use case platform_device_id.
- * 3. check src/dst size with, height.
- * 4. added check_prepare api for right register.
- * 5. need to add supported list in prop_list.
- * 6. check prescaler/scaler optimization.
*/
#define FIMC_MAX_DEVS 4
@@ -59,29 +43,19 @@
#define FIMC_BUF_STOP 1
#define FIMC_BUF_START 2
#define FIMC_WIDTH_ITU_709 1280
-#define FIMC_REFRESH_MAX 60
-#define FIMC_REFRESH_MIN 12
-#define FIMC_CROP_MAX 8192
-#define FIMC_CROP_MIN 32
-#define FIMC_SCALE_MAX 4224
-#define FIMC_SCALE_MIN 32
+#define FIMC_AUTOSUSPEND_DELAY 2000
+
+static unsigned int fimc_mask = 0xc;
+module_param_named(fimc_devs, fimc_mask, uint, 0644);
+MODULE_PARM_DESC(fimc_devs, "Alias mask for assigning FIMC devices to Exynos DRM");
#define get_fimc_context(dev) platform_get_drvdata(to_platform_device(dev))
-#define get_ctx_from_ippdrv(ippdrv) container_of(ippdrv,\
- struct fimc_context, ippdrv);
-enum fimc_wb {
- FIMC_WB_NONE,
- FIMC_WB_A,
- FIMC_WB_B,
-};
enum {
FIMC_CLK_LCLK,
FIMC_CLK_GATE,
FIMC_CLK_WB_A,
FIMC_CLK_WB_B,
- FIMC_CLK_MUX,
- FIMC_CLK_PARENT,
FIMC_CLKS_MAX
};
@@ -90,12 +64,8 @@ static const char * const fimc_clock_names[] = {
[FIMC_CLK_GATE] = "fimc",
[FIMC_CLK_WB_A] = "pxl_async0",
[FIMC_CLK_WB_B] = "pxl_async1",
- [FIMC_CLK_MUX] = "mux",
- [FIMC_CLK_PARENT] = "parent",
};
-#define FIMC_DEFAULT_LCLK_FREQUENCY 133000000UL
-
/*
* A structure of scaler.
*
@@ -107,7 +77,7 @@ static const char * const fimc_clock_names[] = {
* @vratio: vertical ratio.
*/
struct fimc_scaler {
- bool range;
+ bool range;
bool bypass;
bool up_h;
bool up_v;
@@ -116,56 +86,32 @@ struct fimc_scaler {
};
/*
- * A structure of scaler capability.
- *
- * find user manual table 43-1.
- * @in_hori: scaler input horizontal size.
- * @bypass: scaler bypass mode.
- * @dst_h_wo_rot: target horizontal size without output rotation.
- * @dst_h_rot: target horizontal size with output rotation.
- * @rl_w_wo_rot: real width without input rotation.
- * @rl_h_rot: real height without output rotation.
- */
-struct fimc_capability {
- /* scaler */
- u32 in_hori;
- u32 bypass;
- /* output rotator */
- u32 dst_h_wo_rot;
- u32 dst_h_rot;
- /* input rotator */
- u32 rl_w_wo_rot;
- u32 rl_h_rot;
-};
-
-/*
* A structure of fimc context.
*
- * @ippdrv: prepare initialization using ippdrv.
* @regs_res: register resources.
* @regs: memory mapped io registers.
* @lock: locking of operations.
* @clocks: fimc clocks.
- * @clk_frequency: LCLK clock frequency.
- * @sysreg: handle to SYSREG block regmap.
* @sc: scaler infomations.
* @pol: porarity of writeback.
* @id: fimc id.
* @irq: irq number.
- * @suspended: qos operations.
*/
struct fimc_context {
- struct exynos_drm_ippdrv ippdrv;
+ struct exynos_drm_ipp ipp;
+ struct drm_device *drm_dev;
+ struct device *dev;
+ struct exynos_drm_ipp_task *task;
+ struct exynos_drm_ipp_formats *formats;
+ unsigned int num_formats;
+
struct resource *regs_res;
void __iomem *regs;
spinlock_t lock;
struct clk *clocks[FIMC_CLKS_MAX];
- u32 clk_frequency;
- struct regmap *sysreg;
struct fimc_scaler sc;
int id;
int irq;
- bool suspended;
};
static u32 fimc_read(struct fimc_context *ctx, u32 reg)
@@ -217,19 +163,10 @@ static void fimc_sw_reset(struct fimc_context *ctx)
fimc_write(ctx, 0x0, EXYNOS_CIFCNTSEQ);
}
-static int fimc_set_camblk_fimd0_wb(struct fimc_context *ctx)
-{
- return regmap_update_bits(ctx->sysreg, SYSREG_CAMERA_BLK,
- SYSREG_FIMD0WB_DEST_MASK,
- ctx->id << SYSREG_FIMD0WB_DEST_SHIFT);
-}
-
-static void fimc_set_type_ctrl(struct fimc_context *ctx, enum fimc_wb wb)
+static void fimc_set_type_ctrl(struct fimc_context *ctx)
{
u32 cfg;
- DRM_DEBUG_KMS("wb[%d]\n", wb);
-
cfg = fimc_read(ctx, EXYNOS_CIGCTRL);
cfg &= ~(EXYNOS_CIGCTRL_TESTPATTERN_MASK |
EXYNOS_CIGCTRL_SELCAM_ITU_MASK |
@@ -238,23 +175,10 @@ static void fimc_set_type_ctrl(struct fimc_context *ctx, enum fimc_wb wb)
EXYNOS_CIGCTRL_SELWB_CAMIF_MASK |
EXYNOS_CIGCTRL_SELWRITEBACK_MASK);
- switch (wb) {
- case FIMC_WB_A:
- cfg |= (EXYNOS_CIGCTRL_SELWRITEBACK_A |
- EXYNOS_CIGCTRL_SELWB_CAMIF_WRITEBACK);
- break;
- case FIMC_WB_B:
- cfg |= (EXYNOS_CIGCTRL_SELWRITEBACK_B |
- EXYNOS_CIGCTRL_SELWB_CAMIF_WRITEBACK);
- break;
- case FIMC_WB_NONE:
- default:
- cfg |= (EXYNOS_CIGCTRL_SELCAM_ITU_A |
- EXYNOS_CIGCTRL_SELWRITEBACK_A |
- EXYNOS_CIGCTRL_SELCAM_MIPI_A |
- EXYNOS_CIGCTRL_SELCAM_FIMC_ITU);
- break;
- }
+ cfg |= (EXYNOS_CIGCTRL_SELCAM_ITU_A |
+ EXYNOS_CIGCTRL_SELWRITEBACK_A |
+ EXYNOS_CIGCTRL_SELCAM_MIPI_A |
+ EXYNOS_CIGCTRL_SELCAM_FIMC_ITU);
fimc_write(ctx, cfg, EXYNOS_CIGCTRL);
}
@@ -296,7 +220,6 @@ static void fimc_clear_irq(struct fimc_context *ctx)
static bool fimc_check_ovf(struct fimc_context *ctx)
{
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
u32 status, flag;
status = fimc_read(ctx, EXYNOS_CISTATUS);
@@ -310,7 +233,7 @@ static bool fimc_check_ovf(struct fimc_context *ctx)
EXYNOS_CIWDOFST_CLROVFIY | EXYNOS_CIWDOFST_CLROVFICB |
EXYNOS_CIWDOFST_CLROVFICR);
- dev_err(ippdrv->dev, "occurred overflow at %d, status 0x%x.\n",
+ dev_err(ctx->dev, "occurred overflow at %d, status 0x%x.\n",
ctx->id, status);
return true;
}
@@ -376,10 +299,8 @@ static void fimc_handle_lastend(struct fimc_context *ctx, bool enable)
fimc_write(ctx, cfg, EXYNOS_CIOCTRL);
}
-
-static int fimc_src_set_fmt_order(struct fimc_context *ctx, u32 fmt)
+static void fimc_src_set_fmt_order(struct fimc_context *ctx, u32 fmt)
{
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
u32 cfg;
DRM_DEBUG_KMS("fmt[0x%x]\n", fmt);
@@ -392,12 +313,12 @@ static int fimc_src_set_fmt_order(struct fimc_context *ctx, u32 fmt)
case DRM_FORMAT_RGB565:
cfg |= EXYNOS_CISCCTRL_INRGB_FMT_RGB565;
fimc_write(ctx, cfg, EXYNOS_CISCCTRL);
- return 0;
+ return;
case DRM_FORMAT_RGB888:
case DRM_FORMAT_XRGB8888:
cfg |= EXYNOS_CISCCTRL_INRGB_FMT_RGB888;
fimc_write(ctx, cfg, EXYNOS_CISCCTRL);
- return 0;
+ return;
default:
/* bypass */
break;
@@ -438,20 +359,13 @@ static int fimc_src_set_fmt_order(struct fimc_context *ctx, u32 fmt)
cfg |= (EXYNOS_MSCTRL_ORDER2P_LSB_CBCR |
EXYNOS_MSCTRL_C_INT_IN_2PLANE);
break;
- default:
- dev_err(ippdrv->dev, "invalid source yuv order 0x%x.\n", fmt);
- return -EINVAL;
}
fimc_write(ctx, cfg, EXYNOS_MSCTRL);
-
- return 0;
}
-static int fimc_src_set_fmt(struct device *dev, u32 fmt)
+static void fimc_src_set_fmt(struct fimc_context *ctx, u32 fmt, bool tiled)
{
- struct fimc_context *ctx = get_fimc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
u32 cfg;
DRM_DEBUG_KMS("fmt[0x%x]\n", fmt);
@@ -485,9 +399,6 @@ static int fimc_src_set_fmt(struct device *dev, u32 fmt)
case DRM_FORMAT_NV21:
cfg |= EXYNOS_MSCTRL_INFORMAT_YCBCR420;
break;
- default:
- dev_err(ippdrv->dev, "invalid source format 0x%x.\n", fmt);
- return -EINVAL;
}
fimc_write(ctx, cfg, EXYNOS_MSCTRL);
@@ -495,22 +406,22 @@ static int fimc_src_set_fmt(struct device *dev, u32 fmt)
cfg = fimc_read(ctx, EXYNOS_CIDMAPARAM);
cfg &= ~EXYNOS_CIDMAPARAM_R_MODE_MASK;
- cfg |= EXYNOS_CIDMAPARAM_R_MODE_LINEAR;
+ if (tiled)
+ cfg |= EXYNOS_CIDMAPARAM_R_MODE_64X32;
+ else
+ cfg |= EXYNOS_CIDMAPARAM_R_MODE_LINEAR;
fimc_write(ctx, cfg, EXYNOS_CIDMAPARAM);
- return fimc_src_set_fmt_order(ctx, fmt);
+ fimc_src_set_fmt_order(ctx, fmt);
}
-static int fimc_src_set_transf(struct device *dev,
- enum drm_exynos_degree degree,
- enum drm_exynos_flip flip, bool *swap)
+static void fimc_src_set_transf(struct fimc_context *ctx, unsigned int rotation)
{
- struct fimc_context *ctx = get_fimc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+ unsigned int degree = rotation & DRM_MODE_ROTATE_MASK;
u32 cfg1, cfg2;
- DRM_DEBUG_KMS("degree[%d]flip[0x%x]\n", degree, flip);
+ DRM_DEBUG_KMS("rotation[%x]\n", rotation);
cfg1 = fimc_read(ctx, EXYNOS_MSCTRL);
cfg1 &= ~(EXYNOS_MSCTRL_FLIP_X_MIRROR |
@@ -520,61 +431,56 @@ static int fimc_src_set_transf(struct device *dev,
cfg2 &= ~EXYNOS_CITRGFMT_INROT90_CLOCKWISE;
switch (degree) {
- case EXYNOS_DRM_DEGREE_0:
- if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+ case DRM_MODE_ROTATE_0:
+ if (rotation & DRM_MODE_REFLECT_X)
cfg1 |= EXYNOS_MSCTRL_FLIP_X_MIRROR;
- if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+ if (rotation & DRM_MODE_REFLECT_Y)
cfg1 |= EXYNOS_MSCTRL_FLIP_Y_MIRROR;
break;
- case EXYNOS_DRM_DEGREE_90:
+ case DRM_MODE_ROTATE_90:
cfg2 |= EXYNOS_CITRGFMT_INROT90_CLOCKWISE;
- if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+ if (rotation & DRM_MODE_REFLECT_X)
cfg1 |= EXYNOS_MSCTRL_FLIP_X_MIRROR;
- if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+ if (rotation & DRM_MODE_REFLECT_Y)
cfg1 |= EXYNOS_MSCTRL_FLIP_Y_MIRROR;
break;
- case EXYNOS_DRM_DEGREE_180:
+ case DRM_MODE_ROTATE_180:
cfg1 |= (EXYNOS_MSCTRL_FLIP_X_MIRROR |
EXYNOS_MSCTRL_FLIP_Y_MIRROR);
- if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+ if (rotation & DRM_MODE_REFLECT_X)
cfg1 &= ~EXYNOS_MSCTRL_FLIP_X_MIRROR;
- if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+ if (rotation & DRM_MODE_REFLECT_Y)
cfg1 &= ~EXYNOS_MSCTRL_FLIP_Y_MIRROR;
break;
- case EXYNOS_DRM_DEGREE_270:
+ case DRM_MODE_ROTATE_270:
cfg1 |= (EXYNOS_MSCTRL_FLIP_X_MIRROR |
EXYNOS_MSCTRL_FLIP_Y_MIRROR);
cfg2 |= EXYNOS_CITRGFMT_INROT90_CLOCKWISE;
- if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+ if (rotation & DRM_MODE_REFLECT_X)
cfg1 &= ~EXYNOS_MSCTRL_FLIP_X_MIRROR;
- if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+ if (rotation & DRM_MODE_REFLECT_Y)
cfg1 &= ~EXYNOS_MSCTRL_FLIP_Y_MIRROR;
break;
- default:
- dev_err(ippdrv->dev, "invalid degree value %d.\n", degree);
- return -EINVAL;
}
fimc_write(ctx, cfg1, EXYNOS_MSCTRL);
fimc_write(ctx, cfg2, EXYNOS_CITRGFMT);
- *swap = (cfg2 & EXYNOS_CITRGFMT_INROT90_CLOCKWISE) ? 1 : 0;
-
- return 0;
}
-static int fimc_set_window(struct fimc_context *ctx,
- struct drm_exynos_pos *pos, struct drm_exynos_sz *sz)
+static void fimc_set_window(struct fimc_context *ctx,
+ struct exynos_drm_ipp_buffer *buf)
{
u32 cfg, h1, h2, v1, v2;
/* cropped image */
- h1 = pos->x;
- h2 = sz->hsize - pos->w - pos->x;
- v1 = pos->y;
- v2 = sz->vsize - pos->h - pos->y;
+ h1 = buf->rect.x;
+ h2 = buf->buf.width - buf->rect.w - buf->rect.x;
+ v1 = buf->rect.y;
+ v2 = buf->buf.height - buf->rect.h - buf->rect.y;
DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]hsize[%d]vsize[%d]\n",
- pos->x, pos->y, pos->w, pos->h, sz->hsize, sz->vsize);
+ buf->rect.x, buf->rect.y, buf->rect.w, buf->rect.h,
+ buf->buf.width, buf->buf.height);
DRM_DEBUG_KMS("h1[%d]h2[%d]v1[%d]v2[%d]\n", h1, h2, v1, v2);
/*
@@ -592,42 +498,30 @@ static int fimc_set_window(struct fimc_context *ctx,
cfg = (EXYNOS_CIWDOFST2_WINHOROFST2(h2) |
EXYNOS_CIWDOFST2_WINVEROFST2(v2));
fimc_write(ctx, cfg, EXYNOS_CIWDOFST2);
-
- return 0;
}
-static int fimc_src_set_size(struct device *dev, int swap,
- struct drm_exynos_pos *pos, struct drm_exynos_sz *sz)
+static void fimc_src_set_size(struct fimc_context *ctx,
+ struct exynos_drm_ipp_buffer *buf)
{
- struct fimc_context *ctx = get_fimc_context(dev);
- struct drm_exynos_pos img_pos = *pos;
- struct drm_exynos_sz img_sz = *sz;
u32 cfg;
- DRM_DEBUG_KMS("swap[%d]hsize[%d]vsize[%d]\n",
- swap, sz->hsize, sz->vsize);
+ DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", buf->buf.width, buf->buf.height);
/* original size */
- cfg = (EXYNOS_ORGISIZE_HORIZONTAL(img_sz.hsize) |
- EXYNOS_ORGISIZE_VERTICAL(img_sz.vsize));
+ cfg = (EXYNOS_ORGISIZE_HORIZONTAL(buf->buf.width) |
+ EXYNOS_ORGISIZE_VERTICAL(buf->buf.height));
fimc_write(ctx, cfg, EXYNOS_ORGISIZE);
- DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]\n", pos->x, pos->y, pos->w, pos->h);
-
- if (swap) {
- img_pos.w = pos->h;
- img_pos.h = pos->w;
- img_sz.hsize = sz->vsize;
- img_sz.vsize = sz->hsize;
- }
+ DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]\n", buf->rect.x, buf->rect.y,
+ buf->rect.w, buf->rect.h);
/* set input DMA image size */
cfg = fimc_read(ctx, EXYNOS_CIREAL_ISIZE);
cfg &= ~(EXYNOS_CIREAL_ISIZE_HEIGHT_MASK |
EXYNOS_CIREAL_ISIZE_WIDTH_MASK);
- cfg |= (EXYNOS_CIREAL_ISIZE_WIDTH(img_pos.w) |
- EXYNOS_CIREAL_ISIZE_HEIGHT(img_pos.h));
+ cfg |= (EXYNOS_CIREAL_ISIZE_WIDTH(buf->rect.w) |
+ EXYNOS_CIREAL_ISIZE_HEIGHT(buf->rect.h));
fimc_write(ctx, cfg, EXYNOS_CIREAL_ISIZE);
/*
@@ -635,91 +529,34 @@ static int fimc_src_set_size(struct device *dev, int swap,
* for now, we support only ITU601 8 bit mode
*/
cfg = (EXYNOS_CISRCFMT_ITU601_8BIT |
- EXYNOS_CISRCFMT_SOURCEHSIZE(img_sz.hsize) |
- EXYNOS_CISRCFMT_SOURCEVSIZE(img_sz.vsize));
+ EXYNOS_CISRCFMT_SOURCEHSIZE(buf->buf.width) |
+ EXYNOS_CISRCFMT_SOURCEVSIZE(buf->buf.height));
fimc_write(ctx, cfg, EXYNOS_CISRCFMT);
/* offset Y(RGB), Cb, Cr */
- cfg = (EXYNOS_CIIYOFF_HORIZONTAL(img_pos.x) |
- EXYNOS_CIIYOFF_VERTICAL(img_pos.y));
+ cfg = (EXYNOS_CIIYOFF_HORIZONTAL(buf->rect.x) |
+ EXYNOS_CIIYOFF_VERTICAL(buf->rect.y));
fimc_write(ctx, cfg, EXYNOS_CIIYOFF);
- cfg = (EXYNOS_CIICBOFF_HORIZONTAL(img_pos.x) |
- EXYNOS_CIICBOFF_VERTICAL(img_pos.y));
+ cfg = (EXYNOS_CIICBOFF_HORIZONTAL(buf->rect.x) |
+ EXYNOS_CIICBOFF_VERTICAL(buf->rect.y));
fimc_write(ctx, cfg, EXYNOS_CIICBOFF);
- cfg = (EXYNOS_CIICROFF_HORIZONTAL(img_pos.x) |
- EXYNOS_CIICROFF_VERTICAL(img_pos.y));
+ cfg = (EXYNOS_CIICROFF_HORIZONTAL(buf->rect.x) |
+ EXYNOS_CIICROFF_VERTICAL(buf->rect.y));
fimc_write(ctx, cfg, EXYNOS_CIICROFF);
- return fimc_set_window(ctx, &img_pos, &img_sz);
+ fimc_set_window(ctx, buf);
}
-static int fimc_src_set_addr(struct device *dev,
- struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id,
- enum drm_exynos_ipp_buf_type buf_type)
+static void fimc_src_set_addr(struct fimc_context *ctx,
+ struct exynos_drm_ipp_buffer *buf)
{
- struct fimc_context *ctx = get_fimc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
- struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node;
- struct drm_exynos_ipp_property *property;
- struct drm_exynos_ipp_config *config;
-
- if (!c_node) {
- DRM_ERROR("failed to get c_node.\n");
- return -EINVAL;
- }
-
- property = &c_node->property;
-
- DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]buf_type[%d]\n",
- property->prop_id, buf_id, buf_type);
-
- if (buf_id > FIMC_MAX_SRC) {
- dev_info(ippdrv->dev, "invalid buf_id %d.\n", buf_id);
- return -ENOMEM;
- }
-
- /* address register set */
- switch (buf_type) {
- case IPP_BUF_ENQUEUE:
- config = &property->config[EXYNOS_DRM_OPS_SRC];
- fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_Y],
- EXYNOS_CIIYSA0);
-
- if (config->fmt == DRM_FORMAT_YVU420) {
- fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CR],
- EXYNOS_CIICBSA0);
- fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CB],
- EXYNOS_CIICRSA0);
- } else {
- fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CB],
- EXYNOS_CIICBSA0);
- fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CR],
- EXYNOS_CIICRSA0);
- }
- break;
- case IPP_BUF_DEQUEUE:
- fimc_write(ctx, 0x0, EXYNOS_CIIYSA0);
- fimc_write(ctx, 0x0, EXYNOS_CIICBSA0);
- fimc_write(ctx, 0x0, EXYNOS_CIICRSA0);
- break;
- default:
- /* bypass */
- break;
- }
-
- return 0;
+ fimc_write(ctx, buf->dma_addr[0], EXYNOS_CIIYSA(0));
+ fimc_write(ctx, buf->dma_addr[1], EXYNOS_CIICBSA(0));
+ fimc_write(ctx, buf->dma_addr[2], EXYNOS_CIICRSA(0));
}
-static struct exynos_drm_ipp_ops fimc_src_ops = {
- .set_fmt = fimc_src_set_fmt,
- .set_transf = fimc_src_set_transf,
- .set_size = fimc_src_set_size,
- .set_addr = fimc_src_set_addr,
-};
-
-static int fimc_dst_set_fmt_order(struct fimc_context *ctx, u32 fmt)
+static void fimc_dst_set_fmt_order(struct fimc_context *ctx, u32 fmt)
{
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
u32 cfg;
DRM_DEBUG_KMS("fmt[0x%x]\n", fmt);
@@ -732,11 +569,11 @@ static int fimc_dst_set_fmt_order(struct fimc_context *ctx, u32 fmt)
case DRM_FORMAT_RGB565:
cfg |= EXYNOS_CISCCTRL_OUTRGB_FMT_RGB565;
fimc_write(ctx, cfg, EXYNOS_CISCCTRL);
- return 0;
+ return;
case DRM_FORMAT_RGB888:
cfg |= EXYNOS_CISCCTRL_OUTRGB_FMT_RGB888;
fimc_write(ctx, cfg, EXYNOS_CISCCTRL);
- return 0;
+ return;
case DRM_FORMAT_XRGB8888:
cfg |= (EXYNOS_CISCCTRL_OUTRGB_FMT_RGB888 |
EXYNOS_CISCCTRL_EXTRGB_EXTENSION);
@@ -784,20 +621,13 @@ static int fimc_dst_set_fmt_order(struct fimc_context *ctx, u32 fmt)
cfg |= EXYNOS_CIOCTRL_ORDER2P_LSB_CBCR;
cfg |= EXYNOS_CIOCTRL_YCBCR_2PLANE;
break;
- default:
- dev_err(ippdrv->dev, "invalid target yuv order 0x%x.\n", fmt);
- return -EINVAL;
}
fimc_write(ctx, cfg, EXYNOS_CIOCTRL);
-
- return 0;
}
-static int fimc_dst_set_fmt(struct device *dev, u32 fmt)
+static void fimc_dst_set_fmt(struct fimc_context *ctx, u32 fmt, bool tiled)
{
- struct fimc_context *ctx = get_fimc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
u32 cfg;
DRM_DEBUG_KMS("fmt[0x%x]\n", fmt);
@@ -837,10 +667,6 @@ static int fimc_dst_set_fmt(struct device *dev, u32 fmt)
case DRM_FORMAT_NV21:
cfg |= EXYNOS_CITRGFMT_OUTFORMAT_YCBCR420;
break;
- default:
- dev_err(ippdrv->dev, "invalid target format 0x%x.\n",
- fmt);
- return -EINVAL;
}
fimc_write(ctx, cfg, EXYNOS_CITRGFMT);
@@ -849,73 +675,67 @@ static int fimc_dst_set_fmt(struct device *dev, u32 fmt)
cfg = fimc_read(ctx, EXYNOS_CIDMAPARAM);
cfg &= ~EXYNOS_CIDMAPARAM_W_MODE_MASK;
- cfg |= EXYNOS_CIDMAPARAM_W_MODE_LINEAR;
+ if (tiled)
+ cfg |= EXYNOS_CIDMAPARAM_W_MODE_64X32;
+ else
+ cfg |= EXYNOS_CIDMAPARAM_W_MODE_LINEAR;
fimc_write(ctx, cfg, EXYNOS_CIDMAPARAM);
- return fimc_dst_set_fmt_order(ctx, fmt);
+ fimc_dst_set_fmt_order(ctx, fmt);
}
-static int fimc_dst_set_transf(struct device *dev,
- enum drm_exynos_degree degree,
- enum drm_exynos_flip flip, bool *swap)
+static void fimc_dst_set_transf(struct fimc_context *ctx, unsigned int rotation)
{
- struct fimc_context *ctx = get_fimc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+ unsigned int degree = rotation & DRM_MODE_ROTATE_MASK;
u32 cfg;
- DRM_DEBUG_KMS("degree[%d]flip[0x%x]\n", degree, flip);
+ DRM_DEBUG_KMS("rotation[0x%x]\n", rotation);
cfg = fimc_read(ctx, EXYNOS_CITRGFMT);
cfg &= ~EXYNOS_CITRGFMT_FLIP_MASK;
cfg &= ~EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE;
switch (degree) {
- case EXYNOS_DRM_DEGREE_0:
- if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+ case DRM_MODE_ROTATE_0:
+ if (rotation & DRM_MODE_REFLECT_X)
cfg |= EXYNOS_CITRGFMT_FLIP_X_MIRROR;
- if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+ if (rotation & DRM_MODE_REFLECT_Y)
cfg |= EXYNOS_CITRGFMT_FLIP_Y_MIRROR;
break;
- case EXYNOS_DRM_DEGREE_90:
+ case DRM_MODE_ROTATE_90:
cfg |= EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE;
- if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+ if (rotation & DRM_MODE_REFLECT_X)
cfg |= EXYNOS_CITRGFMT_FLIP_X_MIRROR;
- if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+ if (rotation & DRM_MODE_REFLECT_Y)
cfg |= EXYNOS_CITRGFMT_FLIP_Y_MIRROR;
break;
- case EXYNOS_DRM_DEGREE_180:
+ case DRM_MODE_ROTATE_180:
cfg |= (EXYNOS_CITRGFMT_FLIP_X_MIRROR |
EXYNOS_CITRGFMT_FLIP_Y_MIRROR);
- if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+ if (rotation & DRM_MODE_REFLECT_X)
cfg &= ~EXYNOS_CITRGFMT_FLIP_X_MIRROR;
- if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+ if (rotation & DRM_MODE_REFLECT_Y)
cfg &= ~EXYNOS_CITRGFMT_FLIP_Y_MIRROR;
break;
- case EXYNOS_DRM_DEGREE_270:
+ case DRM_MODE_ROTATE_270:
cfg |= (EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE |
EXYNOS_CITRGFMT_FLIP_X_MIRROR |
EXYNOS_CITRGFMT_FLIP_Y_MIRROR);
- if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+ if (rotation & DRM_MODE_REFLECT_X)
cfg &= ~EXYNOS_CITRGFMT_FLIP_X_MIRROR;
- if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+ if (rotation & DRM_MODE_REFLECT_Y)
cfg &= ~EXYNOS_CITRGFMT_FLIP_Y_MIRROR;
break;
- default:
- dev_err(ippdrv->dev, "invalid degree value %d.\n", degree);
- return -EINVAL;
}
fimc_write(ctx, cfg, EXYNOS_CITRGFMT);
- *swap = (cfg & EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE) ? 1 : 0;
-
- return 0;
}
static int fimc_set_prescaler(struct fimc_context *ctx, struct fimc_scaler *sc,
- struct drm_exynos_pos *src, struct drm_exynos_pos *dst)
+ struct drm_exynos_ipp_task_rect *src,
+ struct drm_exynos_ipp_task_rect *dst)
{
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
u32 cfg, cfg_ext, shfactor;
u32 pre_dst_width, pre_dst_height;
u32 hfactor, vfactor;
@@ -942,13 +762,13 @@ static int fimc_set_prescaler(struct fimc_context *ctx, struct fimc_scaler *sc,
/* fimc_ippdrv_check_property assures that dividers are not null */
hfactor = fls(src_w / dst_w / 2);
if (hfactor > FIMC_SHFACTOR / 2) {
- dev_err(ippdrv->dev, "failed to get ratio horizontal.\n");
+ dev_err(ctx->dev, "failed to get ratio horizontal.\n");
return -EINVAL;
}
vfactor = fls(src_h / dst_h / 2);
if (vfactor > FIMC_SHFACTOR / 2) {
- dev_err(ippdrv->dev, "failed to get ratio vertical.\n");
+ dev_err(ctx->dev, "failed to get ratio vertical.\n");
return -EINVAL;
}
@@ -1019,83 +839,77 @@ static void fimc_set_scaler(struct fimc_context *ctx, struct fimc_scaler *sc)
fimc_write(ctx, cfg_ext, EXYNOS_CIEXTEN);
}
-static int fimc_dst_set_size(struct device *dev, int swap,
- struct drm_exynos_pos *pos, struct drm_exynos_sz *sz)
+static void fimc_dst_set_size(struct fimc_context *ctx,
+ struct exynos_drm_ipp_buffer *buf)
{
- struct fimc_context *ctx = get_fimc_context(dev);
- struct drm_exynos_pos img_pos = *pos;
- struct drm_exynos_sz img_sz = *sz;
- u32 cfg;
+ u32 cfg, cfg_ext;
- DRM_DEBUG_KMS("swap[%d]hsize[%d]vsize[%d]\n",
- swap, sz->hsize, sz->vsize);
+ DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", buf->buf.width, buf->buf.height);
/* original size */
- cfg = (EXYNOS_ORGOSIZE_HORIZONTAL(img_sz.hsize) |
- EXYNOS_ORGOSIZE_VERTICAL(img_sz.vsize));
+ cfg = (EXYNOS_ORGOSIZE_HORIZONTAL(buf->buf.width) |
+ EXYNOS_ORGOSIZE_VERTICAL(buf->buf.height));
fimc_write(ctx, cfg, EXYNOS_ORGOSIZE);
- DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]\n", pos->x, pos->y, pos->w, pos->h);
+ DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]\n", buf->rect.x, buf->rect.y,
+ buf->rect.w, buf->rect.h);
/* CSC ITU */
cfg = fimc_read(ctx, EXYNOS_CIGCTRL);
cfg &= ~EXYNOS_CIGCTRL_CSC_MASK;
- if (sz->hsize >= FIMC_WIDTH_ITU_709)
+ if (buf->buf.width >= FIMC_WIDTH_ITU_709)
cfg |= EXYNOS_CIGCTRL_CSC_ITU709;
else
cfg |= EXYNOS_CIGCTRL_CSC_ITU601;
fimc_write(ctx, cfg, EXYNOS_CIGCTRL);
- if (swap) {
- img_pos.w = pos->h;
- img_pos.h = pos->w;
- img_sz.hsize = sz->vsize;
- img_sz.vsize = sz->hsize;
- }
+ cfg_ext = fimc_read(ctx, EXYNOS_CITRGFMT);
/* target image size */
cfg = fimc_read(ctx, EXYNOS_CITRGFMT);
cfg &= ~(EXYNOS_CITRGFMT_TARGETH_MASK |
EXYNOS_CITRGFMT_TARGETV_MASK);
- cfg |= (EXYNOS_CITRGFMT_TARGETHSIZE(img_pos.w) |
- EXYNOS_CITRGFMT_TARGETVSIZE(img_pos.h));
+ if (cfg_ext & EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE)
+ cfg |= (EXYNOS_CITRGFMT_TARGETHSIZE(buf->rect.h) |
+ EXYNOS_CITRGFMT_TARGETVSIZE(buf->rect.w));
+ else
+ cfg |= (EXYNOS_CITRGFMT_TARGETHSIZE(buf->rect.w) |
+ EXYNOS_CITRGFMT_TARGETVSIZE(buf->rect.h));
fimc_write(ctx, cfg, EXYNOS_CITRGFMT);
/* target area */
- cfg = EXYNOS_CITAREA_TARGET_AREA(img_pos.w * img_pos.h);
+ cfg = EXYNOS_CITAREA_TARGET_AREA(buf->rect.w * buf->rect.h);
fimc_write(ctx, cfg, EXYNOS_CITAREA);
/* offset Y(RGB), Cb, Cr */
- cfg = (EXYNOS_CIOYOFF_HORIZONTAL(img_pos.x) |
- EXYNOS_CIOYOFF_VERTICAL(img_pos.y));
+ cfg = (EXYNOS_CIOYOFF_HORIZONTAL(buf->rect.x) |
+ EXYNOS_CIOYOFF_VERTICAL(buf->rect.y));
fimc_write(ctx, cfg, EXYNOS_CIOYOFF);
- cfg = (EXYNOS_CIOCBOFF_HORIZONTAL(img_pos.x) |
- EXYNOS_CIOCBOFF_VERTICAL(img_pos.y));
+ cfg = (EXYNOS_CIOCBOFF_HORIZONTAL(buf->rect.x) |
+ EXYNOS_CIOCBOFF_VERTICAL(buf->rect.y));
fimc_write(ctx, cfg, EXYNOS_CIOCBOFF);
- cfg = (EXYNOS_CIOCROFF_HORIZONTAL(img_pos.x) |
- EXYNOS_CIOCROFF_VERTICAL(img_pos.y));
+ cfg = (EXYNOS_CIOCROFF_HORIZONTAL(buf->rect.x) |
+ EXYNOS_CIOCROFF_VERTICAL(buf->rect.y));
fimc_write(ctx, cfg, EXYNOS_CIOCROFF);
-
- return 0;
}
static void fimc_dst_set_buf_seq(struct fimc_context *ctx, u32 buf_id,
- enum drm_exynos_ipp_buf_type buf_type)
+ bool enqueue)
{
unsigned long flags;
u32 buf_num;
u32 cfg;
- DRM_DEBUG_KMS("buf_id[%d]buf_type[%d]\n", buf_id, buf_type);
+ DRM_DEBUG_KMS("buf_id[%d]enqueu[%d]\n", buf_id, enqueue);
spin_lock_irqsave(&ctx->lock, flags);
cfg = fimc_read(ctx, EXYNOS_CIFCNTSEQ);
- if (buf_type == IPP_BUF_ENQUEUE)
+ if (enqueue)
cfg |= (1 << buf_id);
else
cfg &= ~(1 << buf_id);
@@ -1104,88 +918,29 @@ static void fimc_dst_set_buf_seq(struct fimc_context *ctx, u32 buf_id,
buf_num = hweight32(cfg);
- if (buf_type == IPP_BUF_ENQUEUE && buf_num >= FIMC_BUF_START)
+ if (enqueue && buf_num >= FIMC_BUF_START)
fimc_mask_irq(ctx, true);
- else if (buf_type == IPP_BUF_DEQUEUE && buf_num <= FIMC_BUF_STOP)
+ else if (!enqueue && buf_num <= FIMC_BUF_STOP)
fimc_mask_irq(ctx, false);
spin_unlock_irqrestore(&ctx->lock, flags);
}
-static int fimc_dst_set_addr(struct device *dev,
- struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id,
- enum drm_exynos_ipp_buf_type buf_type)
+static void fimc_dst_set_addr(struct fimc_context *ctx,
+ struct exynos_drm_ipp_buffer *buf)
{
- struct fimc_context *ctx = get_fimc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
- struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node;
- struct drm_exynos_ipp_property *property;
- struct drm_exynos_ipp_config *config;
-
- if (!c_node) {
- DRM_ERROR("failed to get c_node.\n");
- return -EINVAL;
- }
-
- property = &c_node->property;
-
- DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]buf_type[%d]\n",
- property->prop_id, buf_id, buf_type);
+ fimc_write(ctx, buf->dma_addr[0], EXYNOS_CIOYSA(0));
+ fimc_write(ctx, buf->dma_addr[1], EXYNOS_CIOCBSA(0));
+ fimc_write(ctx, buf->dma_addr[2], EXYNOS_CIOCRSA(0));
- if (buf_id > FIMC_MAX_DST) {
- dev_info(ippdrv->dev, "invalid buf_id %d.\n", buf_id);
- return -ENOMEM;
- }
-
- /* address register set */
- switch (buf_type) {
- case IPP_BUF_ENQUEUE:
- config = &property->config[EXYNOS_DRM_OPS_DST];
-
- fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_Y],
- EXYNOS_CIOYSA(buf_id));
-
- if (config->fmt == DRM_FORMAT_YVU420) {
- fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CR],
- EXYNOS_CIOCBSA(buf_id));
- fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CB],
- EXYNOS_CIOCRSA(buf_id));
- } else {
- fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CB],
- EXYNOS_CIOCBSA(buf_id));
- fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CR],
- EXYNOS_CIOCRSA(buf_id));
- }
- break;
- case IPP_BUF_DEQUEUE:
- fimc_write(ctx, 0x0, EXYNOS_CIOYSA(buf_id));
- fimc_write(ctx, 0x0, EXYNOS_CIOCBSA(buf_id));
- fimc_write(ctx, 0x0, EXYNOS_CIOCRSA(buf_id));
- break;
- default:
- /* bypass */
- break;
- }
-
- fimc_dst_set_buf_seq(ctx, buf_id, buf_type);
-
- return 0;
+ fimc_dst_set_buf_seq(ctx, 0, true);
}
-static struct exynos_drm_ipp_ops fimc_dst_ops = {
- .set_fmt = fimc_dst_set_fmt,
- .set_transf = fimc_dst_set_transf,
- .set_size = fimc_dst_set_size,
- .set_addr = fimc_dst_set_addr,
-};
+static void fimc_stop(struct fimc_context *ctx);
static irqreturn_t fimc_irq_handler(int irq, void *dev_id)
{
struct fimc_context *ctx = dev_id;
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
- struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node;
- struct drm_exynos_ipp_event_work *event_work =
- c_node->event_work;
int buf_id;
DRM_DEBUG_KMS("fimc id[%d]\n", ctx->id);
@@ -1203,170 +958,19 @@ static irqreturn_t fimc_irq_handler(int irq, void *dev_id)
DRM_DEBUG_KMS("buf_id[%d]\n", buf_id);
- fimc_dst_set_buf_seq(ctx, buf_id, IPP_BUF_DEQUEUE);
-
- event_work->ippdrv = ippdrv;
- event_work->buf_id[EXYNOS_DRM_OPS_DST] = buf_id;
- queue_work(ippdrv->event_workq, &event_work->work);
-
- return IRQ_HANDLED;
-}
-
-static int fimc_init_prop_list(struct exynos_drm_ippdrv *ippdrv)
-{
- struct drm_exynos_ipp_prop_list *prop_list = &ippdrv->prop_list;
-
- prop_list->version = 1;
- prop_list->writeback = 1;
- prop_list->refresh_min = FIMC_REFRESH_MIN;
- prop_list->refresh_max = FIMC_REFRESH_MAX;
- prop_list->flip = (1 << EXYNOS_DRM_FLIP_NONE) |
- (1 << EXYNOS_DRM_FLIP_VERTICAL) |
- (1 << EXYNOS_DRM_FLIP_HORIZONTAL);
- prop_list->degree = (1 << EXYNOS_DRM_DEGREE_0) |
- (1 << EXYNOS_DRM_DEGREE_90) |
- (1 << EXYNOS_DRM_DEGREE_180) |
- (1 << EXYNOS_DRM_DEGREE_270);
- prop_list->csc = 1;
- prop_list->crop = 1;
- prop_list->crop_max.hsize = FIMC_CROP_MAX;
- prop_list->crop_max.vsize = FIMC_CROP_MAX;
- prop_list->crop_min.hsize = FIMC_CROP_MIN;
- prop_list->crop_min.vsize = FIMC_CROP_MIN;
- prop_list->scale = 1;
- prop_list->scale_max.hsize = FIMC_SCALE_MAX;
- prop_list->scale_max.vsize = FIMC_SCALE_MAX;
- prop_list->scale_min.hsize = FIMC_SCALE_MIN;
- prop_list->scale_min.vsize = FIMC_SCALE_MIN;
-
- return 0;
-}
-
-static inline bool fimc_check_drm_flip(enum drm_exynos_flip flip)
-{
- switch (flip) {
- case EXYNOS_DRM_FLIP_NONE:
- case EXYNOS_DRM_FLIP_VERTICAL:
- case EXYNOS_DRM_FLIP_HORIZONTAL:
- case EXYNOS_DRM_FLIP_BOTH:
- return true;
- default:
- DRM_DEBUG_KMS("invalid flip\n");
- return false;
- }
-}
-
-static int fimc_ippdrv_check_property(struct device *dev,
- struct drm_exynos_ipp_property *property)
-{
- struct fimc_context *ctx = get_fimc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
- struct drm_exynos_ipp_prop_list *pp = &ippdrv->prop_list;
- struct drm_exynos_ipp_config *config;
- struct drm_exynos_pos *pos;
- struct drm_exynos_sz *sz;
- bool swap;
- int i;
-
- for_each_ipp_ops(i) {
- if ((i == EXYNOS_DRM_OPS_SRC) &&
- (property->cmd == IPP_CMD_WB))
- continue;
-
- config = &property->config[i];
- pos = &config->pos;
- sz = &config->sz;
-
- /* check for flip */
- if (!fimc_check_drm_flip(config->flip)) {
- DRM_ERROR("invalid flip.\n");
- goto err_property;
- }
-
- /* check for degree */
- switch (config->degree) {
- case EXYNOS_DRM_DEGREE_90:
- case EXYNOS_DRM_DEGREE_270:
- swap = true;
- break;
- case EXYNOS_DRM_DEGREE_0:
- case EXYNOS_DRM_DEGREE_180:
- swap = false;
- break;
- default:
- DRM_ERROR("invalid degree.\n");
- goto err_property;
- }
-
- /* check for buffer bound */
- if ((pos->x + pos->w > sz->hsize) ||
- (pos->y + pos->h > sz->vsize)) {
- DRM_ERROR("out of buf bound.\n");
- goto err_property;
- }
+ if (ctx->task) {
+ struct exynos_drm_ipp_task *task = ctx->task;
- /* check for crop */
- if ((i == EXYNOS_DRM_OPS_SRC) && (pp->crop)) {
- if (swap) {
- if ((pos->h < pp->crop_min.hsize) ||
- (sz->vsize > pp->crop_max.hsize) ||
- (pos->w < pp->crop_min.vsize) ||
- (sz->hsize > pp->crop_max.vsize)) {
- DRM_ERROR("out of crop size.\n");
- goto err_property;
- }
- } else {
- if ((pos->w < pp->crop_min.hsize) ||
- (sz->hsize > pp->crop_max.hsize) ||
- (pos->h < pp->crop_min.vsize) ||
- (sz->vsize > pp->crop_max.vsize)) {
- DRM_ERROR("out of crop size.\n");
- goto err_property;
- }
- }
- }
-
- /* check for scale */
- if ((i == EXYNOS_DRM_OPS_DST) && (pp->scale)) {
- if (swap) {
- if ((pos->h < pp->scale_min.hsize) ||
- (sz->vsize > pp->scale_max.hsize) ||
- (pos->w < pp->scale_min.vsize) ||
- (sz->hsize > pp->scale_max.vsize)) {
- DRM_ERROR("out of scale size.\n");
- goto err_property;
- }
- } else {
- if ((pos->w < pp->scale_min.hsize) ||
- (sz->hsize > pp->scale_max.hsize) ||
- (pos->h < pp->scale_min.vsize) ||
- (sz->vsize > pp->scale_max.vsize)) {
- DRM_ERROR("out of scale size.\n");
- goto err_property;
- }
- }
- }
+ ctx->task = NULL;
+ pm_runtime_mark_last_busy(ctx->dev);
+ pm_runtime_put_autosuspend(ctx->dev);
+ exynos_drm_ipp_task_done(task, 0);
}
- return 0;
+ fimc_dst_set_buf_seq(ctx, buf_id, false);
+ fimc_stop(ctx);
-err_property:
- for_each_ipp_ops(i) {
- if ((i == EXYNOS_DRM_OPS_SRC) &&
- (property->cmd == IPP_CMD_WB))
- continue;
-
- config = &property->config[i];
- pos = &config->pos;
- sz = &config->sz;
-
- DRM_ERROR("[%s]f[%d]r[%d]pos[%d %d %d %d]sz[%d %d]\n",
- i ? "dst" : "src", config->flip, config->degree,
- pos->x, pos->y, pos->w, pos->h,
- sz->hsize, sz->vsize);
- }
-
- return -EINVAL;
+ return IRQ_HANDLED;
}
static void fimc_clear_addr(struct fimc_context *ctx)
@@ -1386,10 +990,8 @@ static void fimc_clear_addr(struct fimc_context *ctx)
}
}
-static int fimc_ippdrv_reset(struct device *dev)
+static void fimc_reset(struct fimc_context *ctx)
{
- struct fimc_context *ctx = get_fimc_context(dev);
-
/* reset h/w block */
fimc_sw_reset(ctx);
@@ -1397,82 +999,26 @@ static int fimc_ippdrv_reset(struct device *dev)
memset(&ctx->sc, 0x0, sizeof(ctx->sc));
fimc_clear_addr(ctx);
-
- return 0;
}
-static int fimc_ippdrv_start(struct device *dev, enum drm_exynos_ipp_cmd cmd)
+static void fimc_start(struct fimc_context *ctx)
{
- struct fimc_context *ctx = get_fimc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
- struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node;
- struct drm_exynos_ipp_property *property;
- struct drm_exynos_ipp_config *config;
- struct drm_exynos_pos img_pos[EXYNOS_DRM_OPS_MAX];
- struct drm_exynos_ipp_set_wb set_wb;
- int ret, i;
u32 cfg0, cfg1;
- DRM_DEBUG_KMS("cmd[%d]\n", cmd);
-
- if (!c_node) {
- DRM_ERROR("failed to get c_node.\n");
- return -EINVAL;
- }
-
- property = &c_node->property;
-
fimc_mask_irq(ctx, true);
- for_each_ipp_ops(i) {
- config = &property->config[i];
- img_pos[i] = config->pos;
- }
-
- ret = fimc_set_prescaler(ctx, &ctx->sc,
- &img_pos[EXYNOS_DRM_OPS_SRC],
- &img_pos[EXYNOS_DRM_OPS_DST]);
- if (ret) {
- dev_err(dev, "failed to set prescaler.\n");
- return ret;
- }
-
- /* If set ture, we can save jpeg about screen */
+ /* If set true, we can save jpeg about screen */
fimc_handle_jpeg(ctx, false);
fimc_set_scaler(ctx, &ctx->sc);
- switch (cmd) {
- case IPP_CMD_M2M:
- fimc_set_type_ctrl(ctx, FIMC_WB_NONE);
- fimc_handle_lastend(ctx, false);
-
- /* setup dma */
- cfg0 = fimc_read(ctx, EXYNOS_MSCTRL);
- cfg0 &= ~EXYNOS_MSCTRL_INPUT_MASK;
- cfg0 |= EXYNOS_MSCTRL_INPUT_MEMORY;
- fimc_write(ctx, cfg0, EXYNOS_MSCTRL);
- break;
- case IPP_CMD_WB:
- fimc_set_type_ctrl(ctx, FIMC_WB_A);
- fimc_handle_lastend(ctx, true);
-
- /* setup FIMD */
- ret = fimc_set_camblk_fimd0_wb(ctx);
- if (ret < 0) {
- dev_err(dev, "camblk setup failed.\n");
- return ret;
- }
+ fimc_set_type_ctrl(ctx);
+ fimc_handle_lastend(ctx, false);
- set_wb.enable = 1;
- set_wb.refresh = property->refresh_rate;
- exynos_drm_ippnb_send_event(IPP_SET_WRITEBACK, (void *)&set_wb);
- break;
- case IPP_CMD_OUTPUT:
- default:
- ret = -EINVAL;
- dev_err(dev, "invalid operations.\n");
- return ret;
- }
+ /* setup dma */
+ cfg0 = fimc_read(ctx, EXYNOS_MSCTRL);
+ cfg0 &= ~EXYNOS_MSCTRL_INPUT_MASK;
+ cfg0 |= EXYNOS_MSCTRL_INPUT_MEMORY;
+ fimc_write(ctx, cfg0, EXYNOS_MSCTRL);
/* Reset status */
fimc_write(ctx, 0x0, EXYNOS_CISTATUS);
@@ -1498,36 +1044,18 @@ static int fimc_ippdrv_start(struct device *dev, enum drm_exynos_ipp_cmd cmd)
fimc_clear_bits(ctx, EXYNOS_CIOCTRL, EXYNOS_CIOCTRL_WEAVE_MASK);
- if (cmd == IPP_CMD_M2M)
- fimc_set_bits(ctx, EXYNOS_MSCTRL, EXYNOS_MSCTRL_ENVID);
-
- return 0;
+ fimc_set_bits(ctx, EXYNOS_MSCTRL, EXYNOS_MSCTRL_ENVID);
}
-static void fimc_ippdrv_stop(struct device *dev, enum drm_exynos_ipp_cmd cmd)
+static void fimc_stop(struct fimc_context *ctx)
{
- struct fimc_context *ctx = get_fimc_context(dev);
- struct drm_exynos_ipp_set_wb set_wb = {0, 0};
u32 cfg;
- DRM_DEBUG_KMS("cmd[%d]\n", cmd);
-
- switch (cmd) {
- case IPP_CMD_M2M:
- /* Source clear */
- cfg = fimc_read(ctx, EXYNOS_MSCTRL);
- cfg &= ~EXYNOS_MSCTRL_INPUT_MASK;
- cfg &= ~EXYNOS_MSCTRL_ENVID;
- fimc_write(ctx, cfg, EXYNOS_MSCTRL);
- break;
- case IPP_CMD_WB:
- exynos_drm_ippnb_send_event(IPP_SET_WRITEBACK, (void *)&set_wb);
- break;
- case IPP_CMD_OUTPUT:
- default:
- dev_err(dev, "invalid operations.\n");
- break;
- }
+ /* Source clear */
+ cfg = fimc_read(ctx, EXYNOS_MSCTRL);
+ cfg &= ~EXYNOS_MSCTRL_INPUT_MASK;
+ cfg &= ~EXYNOS_MSCTRL_ENVID;
+ fimc_write(ctx, cfg, EXYNOS_MSCTRL);
fimc_mask_irq(ctx, false);
@@ -1545,6 +1073,87 @@ static void fimc_ippdrv_stop(struct device *dev, enum drm_exynos_ipp_cmd cmd)
fimc_set_bits(ctx, EXYNOS_CIGCTRL, EXYNOS_CIGCTRL_IRQ_END_DISABLE);
}
+static int fimc_commit(struct exynos_drm_ipp *ipp,
+ struct exynos_drm_ipp_task *task)
+{
+ struct fimc_context *ctx =
+ container_of(ipp, struct fimc_context, ipp);
+
+ pm_runtime_get_sync(ctx->dev);
+ ctx->task = task;
+
+ fimc_src_set_fmt(ctx, task->src.buf.fourcc, task->src.buf.modifier);
+ fimc_src_set_size(ctx, &task->src);
+ fimc_src_set_transf(ctx, DRM_MODE_ROTATE_0);
+ fimc_src_set_addr(ctx, &task->src);
+ fimc_dst_set_fmt(ctx, task->dst.buf.fourcc, task->dst.buf.modifier);
+ fimc_dst_set_transf(ctx, task->transform.rotation);
+ fimc_dst_set_size(ctx, &task->dst);
+ fimc_dst_set_addr(ctx, &task->dst);
+ fimc_set_prescaler(ctx, &ctx->sc, &task->src.rect, &task->dst.rect);
+ fimc_start(ctx);
+
+ return 0;
+}
+
+static void fimc_abort(struct exynos_drm_ipp *ipp,
+ struct exynos_drm_ipp_task *task)
+{
+ struct fimc_context *ctx =
+ container_of(ipp, struct fimc_context, ipp);
+
+ fimc_reset(ctx);
+
+ if (ctx->task) {
+ struct exynos_drm_ipp_task *task = ctx->task;
+
+ ctx->task = NULL;
+ pm_runtime_mark_last_busy(ctx->dev);
+ pm_runtime_put_autosuspend(ctx->dev);
+ exynos_drm_ipp_task_done(task, -EIO);
+ }
+}
+
+static struct exynos_drm_ipp_funcs ipp_funcs = {
+ .commit = fimc_commit,
+ .abort = fimc_abort,
+};
+
+static int fimc_bind(struct device *dev, struct device *master, void *data)
+{
+ struct fimc_context *ctx = dev_get_drvdata(dev);
+ struct drm_device *drm_dev = data;
+ struct exynos_drm_ipp *ipp = &ctx->ipp;
+
+ ctx->drm_dev = drm_dev;
+ drm_iommu_attach_device(drm_dev, dev);
+
+ exynos_drm_ipp_register(drm_dev, ipp, &ipp_funcs,
+ DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE |
+ DRM_EXYNOS_IPP_CAP_SCALE | DRM_EXYNOS_IPP_CAP_CONVERT,
+ ctx->formats, ctx->num_formats, "fimc");
+
+ dev_info(dev, "The exynos fimc has been probed successfully\n");
+
+ return 0;
+}
+
+static void fimc_unbind(struct device *dev, struct device *master,
+ void *data)
+{
+ struct fimc_context *ctx = dev_get_drvdata(dev);
+ struct drm_device *drm_dev = data;
+ struct exynos_drm_ipp *ipp = &ctx->ipp;
+
+ exynos_drm_ipp_unregister(drm_dev, ipp);
+ drm_iommu_detach_device(drm_dev, dev);
+}
+
+static const struct component_ops fimc_component_ops = {
+ .bind = fimc_bind,
+ .unbind = fimc_unbind,
+};
+
static void fimc_put_clocks(struct fimc_context *ctx)
{
int i;
@@ -1559,7 +1168,7 @@ static void fimc_put_clocks(struct fimc_context *ctx)
static int fimc_setup_clocks(struct fimc_context *ctx)
{
- struct device *fimc_dev = ctx->ippdrv.dev;
+ struct device *fimc_dev = ctx->dev;
struct device *dev;
int ret, i;
@@ -1574,8 +1183,6 @@ static int fimc_setup_clocks(struct fimc_context *ctx)
ctx->clocks[i] = clk_get(dev, fimc_clock_names[i]);
if (IS_ERR(ctx->clocks[i])) {
- if (i >= FIMC_CLK_MUX)
- break;
ret = PTR_ERR(ctx->clocks[i]);
dev_err(fimc_dev, "failed to get clock: %s\n",
fimc_clock_names[i]);
@@ -1583,20 +1190,6 @@ static int fimc_setup_clocks(struct fimc_context *ctx)
}
}
- /* Optional FIMC LCLK parent clock setting */
- if (!IS_ERR(ctx->clocks[FIMC_CLK_PARENT])) {
- ret = clk_set_parent(ctx->clocks[FIMC_CLK_MUX],
- ctx->clocks[FIMC_CLK_PARENT]);
- if (ret < 0) {
- dev_err(fimc_dev, "failed to set parent.\n");
- goto e_clk_free;
- }
- }
-
- ret = clk_set_rate(ctx->clocks[FIMC_CLK_LCLK], ctx->clk_frequency);
- if (ret < 0)
- goto e_clk_free;
-
ret = clk_prepare_enable(ctx->clocks[FIMC_CLK_LCLK]);
if (!ret)
return ret;
@@ -1605,57 +1198,118 @@ e_clk_free:
return ret;
}
-static int fimc_parse_dt(struct fimc_context *ctx)
+int exynos_drm_check_fimc_device(struct device *dev)
{
- struct device_node *node = ctx->ippdrv.dev->of_node;
+ unsigned int id = of_alias_get_id(dev->of_node, "fimc");
- /* Handle only devices that support the LCD Writeback data path */
- if (!of_property_read_bool(node, "samsung,lcd-wb"))
- return -ENODEV;
+ if (id >= 0 && (BIT(id) & fimc_mask))
+ return 0;
+ return -ENODEV;
+}
- if (of_property_read_u32(node, "clock-frequency",
- &ctx->clk_frequency))
- ctx->clk_frequency = FIMC_DEFAULT_LCLK_FREQUENCY;
+static const unsigned int fimc_formats[] = {
+ DRM_FORMAT_XRGB8888, DRM_FORMAT_RGB565,
+ DRM_FORMAT_NV12, DRM_FORMAT_NV16, DRM_FORMAT_NV21, DRM_FORMAT_NV61,
+ DRM_FORMAT_UYVY, DRM_FORMAT_VYUY, DRM_FORMAT_YUYV, DRM_FORMAT_YVYU,
+ DRM_FORMAT_YUV420, DRM_FORMAT_YVU420, DRM_FORMAT_YUV422,
+ DRM_FORMAT_YUV444,
+};
- ctx->id = of_alias_get_id(node, "fimc");
+static const unsigned int fimc_tiled_formats[] = {
+ DRM_FORMAT_NV12, DRM_FORMAT_NV21,
+};
- if (ctx->id < 0) {
- dev_err(ctx->ippdrv.dev, "failed to get node alias id.\n");
- return -EINVAL;
- }
+static const struct drm_exynos_ipp_limit fimc_4210_limits_v1[] = {
+ { IPP_SIZE_LIMIT(BUFFER, .h = { 16, 8192, 8 }, .v = { 16, 8192, 2 }) },
+ { IPP_SIZE_LIMIT(AREA, .h = { 16, 4224, 2 }, .v = { 16, 0, 2 }) },
+ { IPP_SIZE_LIMIT(ROTATED, .h = { 128, 1920 }, .v = { 128, 0 }) },
+ { IPP_SCALE_LIMIT(.h = { (1 << 16) / 64, (1 << 16) * 64 },
+ .v = { (1 << 16) / 64, (1 << 16) * 64 }) },
+};
- return 0;
-}
+static const struct drm_exynos_ipp_limit fimc_4210_limits_v2[] = {
+ { IPP_SIZE_LIMIT(BUFFER, .h = { 16, 8192, 8 }, .v = { 16, 8192, 2 }) },
+ { IPP_SIZE_LIMIT(AREA, .h = { 16, 1920, 2 }, .v = { 16, 0, 2 }) },
+ { IPP_SIZE_LIMIT(ROTATED, .h = { 128, 1366 }, .v = { 128, 0 }) },
+ { IPP_SCALE_LIMIT(.h = { (1 << 16) / 64, (1 << 16) * 64 },
+ .v = { (1 << 16) / 64, (1 << 16) * 64 }) },
+};
+
+static const struct drm_exynos_ipp_limit fimc_4210_limits_tiled_v1[] = {
+ { IPP_SIZE_LIMIT(BUFFER, .h = { 128, 1920, 128 }, .v = { 32, 1920, 32 }) },
+ { IPP_SIZE_LIMIT(AREA, .h = { 128, 1920, 2 }, .v = { 128, 0, 2 }) },
+ { IPP_SCALE_LIMIT(.h = { (1 << 16) / 64, (1 << 16) * 64 },
+ .v = { (1 << 16) / 64, (1 << 16) * 64 }) },
+};
+
+static const struct drm_exynos_ipp_limit fimc_4210_limits_tiled_v2[] = {
+ { IPP_SIZE_LIMIT(BUFFER, .h = { 128, 1920, 128 }, .v = { 32, 1920, 32 }) },
+ { IPP_SIZE_LIMIT(AREA, .h = { 128, 1366, 2 }, .v = { 128, 0, 2 }) },
+ { IPP_SCALE_LIMIT(.h = { (1 << 16) / 64, (1 << 16) * 64 },
+ .v = { (1 << 16) / 64, (1 << 16) * 64 }) },
+};
static int fimc_probe(struct platform_device *pdev)
{
+ const struct drm_exynos_ipp_limit *limits;
+ struct exynos_drm_ipp_formats *formats;
struct device *dev = &pdev->dev;
struct fimc_context *ctx;
struct resource *res;
- struct exynos_drm_ippdrv *ippdrv;
int ret;
+ int i, j, num_limits, num_formats;
- if (!dev->of_node) {
- dev_err(dev, "device tree node not found.\n");
+ if (exynos_drm_check_fimc_device(dev) != 0)
return -ENODEV;
- }
ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
- ctx->ippdrv.dev = dev;
+ ctx->dev = dev;
+ ctx->id = of_alias_get_id(dev->of_node, "fimc");
- ret = fimc_parse_dt(ctx);
- if (ret < 0)
- return ret;
+ /* construct formats/limits array */
+ num_formats = ARRAY_SIZE(fimc_formats) + ARRAY_SIZE(fimc_tiled_formats);
+ formats = devm_kzalloc(dev, sizeof(*formats) * num_formats, GFP_KERNEL);
+ if (!formats)
+ return -ENOMEM;
+
+ /* linear formats */
+ if (ctx->id < 3) {
+ limits = fimc_4210_limits_v1;
+ num_limits = ARRAY_SIZE(fimc_4210_limits_v1);
+ } else {
+ limits = fimc_4210_limits_v2;
+ num_limits = ARRAY_SIZE(fimc_4210_limits_v2);
+ }
+ for (i = 0; i < ARRAY_SIZE(fimc_formats); i++) {
+ formats[i].fourcc = fimc_formats[i];
+ formats[i].type = DRM_EXYNOS_IPP_FORMAT_SOURCE |
+ DRM_EXYNOS_IPP_FORMAT_DESTINATION;
+ formats[i].limits = limits;
+ formats[i].num_limits = num_limits;
+ }
- ctx->sysreg = syscon_regmap_lookup_by_phandle(dev->of_node,
- "samsung,sysreg");
- if (IS_ERR(ctx->sysreg)) {
- dev_err(dev, "syscon regmap lookup failed.\n");
- return PTR_ERR(ctx->sysreg);
+ /* tiled formats */
+ if (ctx->id < 3) {
+ limits = fimc_4210_limits_tiled_v1;
+ num_limits = ARRAY_SIZE(fimc_4210_limits_tiled_v1);
+ } else {
+ limits = fimc_4210_limits_tiled_v2;
+ num_limits = ARRAY_SIZE(fimc_4210_limits_tiled_v2);
}
+ for (j = i, i = 0; i < ARRAY_SIZE(fimc_tiled_formats); j++, i++) {
+ formats[j].fourcc = fimc_tiled_formats[i];
+ formats[j].modifier = DRM_FORMAT_MOD_SAMSUNG_64_32_TILE;
+ formats[j].type = DRM_EXYNOS_IPP_FORMAT_SOURCE |
+ DRM_EXYNOS_IPP_FORMAT_DESTINATION;
+ formats[j].limits = limits;
+ formats[j].num_limits = num_limits;
+ }
+
+ ctx->formats = formats;
+ ctx->num_formats = num_formats;
/* resource memory */
ctx->regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -1670,9 +1324,8 @@ static int fimc_probe(struct platform_device *pdev)
return -ENOENT;
}
- ctx->irq = res->start;
- ret = devm_request_threaded_irq(dev, ctx->irq, NULL, fimc_irq_handler,
- IRQF_ONESHOT, "drm_fimc", ctx);
+ ret = devm_request_irq(dev, res->start, fimc_irq_handler,
+ 0, dev_name(dev), ctx);
if (ret < 0) {
dev_err(dev, "failed to request irq.\n");
return ret;
@@ -1682,39 +1335,24 @@ static int fimc_probe(struct platform_device *pdev)
if (ret < 0)
return ret;
- ippdrv = &ctx->ippdrv;
- ippdrv->ops[EXYNOS_DRM_OPS_SRC] = &fimc_src_ops;
- ippdrv->ops[EXYNOS_DRM_OPS_DST] = &fimc_dst_ops;
- ippdrv->check_property = fimc_ippdrv_check_property;
- ippdrv->reset = fimc_ippdrv_reset;
- ippdrv->start = fimc_ippdrv_start;
- ippdrv->stop = fimc_ippdrv_stop;
- ret = fimc_init_prop_list(ippdrv);
- if (ret < 0) {
- dev_err(dev, "failed to init property list.\n");
- goto err_put_clk;
- }
-
- DRM_DEBUG_KMS("id[%d]ippdrv[%pK]\n", ctx->id, ippdrv);
-
spin_lock_init(&ctx->lock);
platform_set_drvdata(pdev, ctx);
+ pm_runtime_use_autosuspend(dev);
+ pm_runtime_set_autosuspend_delay(dev, FIMC_AUTOSUSPEND_DELAY);
pm_runtime_enable(dev);
- ret = exynos_drm_ippdrv_register(ippdrv);
- if (ret < 0) {
- dev_err(dev, "failed to register drm fimc device.\n");
+ ret = component_add(dev, &fimc_component_ops);
+ if (ret)
goto err_pm_dis;
- }
dev_info(dev, "drm fimc registered successfully.\n");
return 0;
err_pm_dis:
+ pm_runtime_dont_use_autosuspend(dev);
pm_runtime_disable(dev);
-err_put_clk:
fimc_put_clocks(ctx);
return ret;
@@ -1724,42 +1362,24 @@ static int fimc_remove(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct fimc_context *ctx = get_fimc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
- exynos_drm_ippdrv_unregister(ippdrv);
+ component_del(dev, &fimc_component_ops);
+ pm_runtime_dont_use_autosuspend(dev);
+ pm_runtime_disable(dev);
fimc_put_clocks(ctx);
- pm_runtime_set_suspended(dev);
- pm_runtime_disable(dev);
return 0;
}
#ifdef CONFIG_PM
-static int fimc_clk_ctrl(struct fimc_context *ctx, bool enable)
-{
- DRM_DEBUG_KMS("enable[%d]\n", enable);
-
- if (enable) {
- clk_prepare_enable(ctx->clocks[FIMC_CLK_GATE]);
- clk_prepare_enable(ctx->clocks[FIMC_CLK_WB_A]);
- ctx->suspended = false;
- } else {
- clk_disable_unprepare(ctx->clocks[FIMC_CLK_GATE]);
- clk_disable_unprepare(ctx->clocks[FIMC_CLK_WB_A]);
- ctx->suspended = true;
- }
-
- return 0;
-}
-
static int fimc_runtime_suspend(struct device *dev)
{
struct fimc_context *ctx = get_fimc_context(dev);
DRM_DEBUG_KMS("id[%d]\n", ctx->id);
-
- return fimc_clk_ctrl(ctx, false);
+ clk_disable_unprepare(ctx->clocks[FIMC_CLK_GATE]);
+ return 0;
}
static int fimc_runtime_resume(struct device *dev)
@@ -1767,8 +1387,7 @@ static int fimc_runtime_resume(struct device *dev)
struct fimc_context *ctx = get_fimc_context(dev);
DRM_DEBUG_KMS("id[%d]\n", ctx->id);
-
- return fimc_clk_ctrl(ctx, true);
+ return clk_prepare_enable(ctx->clocks[FIMC_CLK_GATE]);
}
#endif
@@ -1795,4 +1414,3 @@ struct platform_driver fimc_driver = {
.pm = &fimc_pm_ops,
},
};
-
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.h b/drivers/gpu/drm/exynos/exynos_drm_fimc.h
deleted file mode 100644
index 127a424c5fdf..000000000000
--- a/drivers/gpu/drm/exynos/exynos_drm_fimc.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (c) 2012 Samsung Electronics Co., Ltd.
- *
- * Authors:
- * Eunchul Kim <chulspro.kim@samsung.com>
- * Jinyoung Jeon <jy0.jeon@samsung.com>
- * Sangmin Lee <lsmin.lee@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-#ifndef _EXYNOS_DRM_FIMC_H_
-#define _EXYNOS_DRM_FIMC_H_
-
-/*
- * TODO
- * FIMD output interface notifier callback.
- */
-
-#endif /* _EXYNOS_DRM_FIMC_H_ */
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index d42ae2bc3e56..01b1570d0c3a 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -121,6 +121,12 @@ static struct fimd_driver_data s3c64xx_fimd_driver_data = {
.has_limited_fmt = 1,
};
+static struct fimd_driver_data s5pv210_fimd_driver_data = {
+ .timing_base = 0x0,
+ .has_shadowcon = 1,
+ .has_clksel = 1,
+};
+
static struct fimd_driver_data exynos3_fimd_driver_data = {
.timing_base = 0x20000,
.lcdblk_offset = 0x210,
@@ -193,6 +199,8 @@ struct fimd_context {
static const struct of_device_id fimd_driver_dt_match[] = {
{ .compatible = "samsung,s3c6400-fimd",
.data = &s3c64xx_fimd_driver_data },
+ { .compatible = "samsung,s5pv210-fimd",
+ .data = &s5pv210_fimd_driver_data },
{ .compatible = "samsung,exynos3250-fimd",
.data = &exynos3_fimd_driver_data },
{ .compatible = "samsung,exynos4210-fimd",
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c
index 11cc01b47bc0..6e1494fa71b4 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c
@@ -431,37 +431,24 @@ int exynos_drm_gem_dumb_create(struct drm_file *file_priv,
return 0;
}
-int exynos_drm_gem_fault(struct vm_fault *vmf)
+vm_fault_t exynos_drm_gem_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct drm_gem_object *obj = vma->vm_private_data;
struct exynos_drm_gem *exynos_gem = to_exynos_gem(obj);
unsigned long pfn;
pgoff_t page_offset;
- int ret;
page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
if (page_offset >= (exynos_gem->size >> PAGE_SHIFT)) {
DRM_ERROR("invalid page offset\n");
- ret = -EINVAL;
- goto out;
+ return VM_FAULT_SIGBUS;
}
pfn = page_to_pfn(exynos_gem->pages[page_offset]);
- ret = vm_insert_mixed(vma, vmf->address, __pfn_to_pfn_t(pfn, PFN_DEV));
-
-out:
- switch (ret) {
- case 0:
- case -ERESTARTSYS:
- case -EINTR:
- return VM_FAULT_NOPAGE;
- case -ENOMEM:
- return VM_FAULT_OOM;
- default:
- return VM_FAULT_SIGBUS;
- }
+ return vmf_insert_mixed(vma, vmf->address,
+ __pfn_to_pfn_t(pfn, PFN_DEV));
}
static int exynos_drm_gem_mmap_obj(struct drm_gem_object *obj,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.h b/drivers/gpu/drm/exynos/exynos_drm_gem.h
index 5a4c7de80f65..9057d7f1d6ed 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.h
@@ -13,6 +13,7 @@
#define _EXYNOS_DRM_GEM_H_
#include <drm/drm_gem.h>
+#include <linux/mm_types.h>
#define to_exynos_gem(x) container_of(x, struct exynos_drm_gem, base)
@@ -111,7 +112,7 @@ int exynos_drm_gem_dumb_create(struct drm_file *file_priv,
struct drm_mode_create_dumb *args);
/* page fault handler and mmap fault address(virtual) to physical memory. */
-int exynos_drm_gem_fault(struct vm_fault *vmf);
+vm_fault_t exynos_drm_gem_fault(struct vm_fault *vmf);
/* set vm_flags and we can change the vm attribute to other one at here. */
int exynos_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
index 0506b2b17ac1..e99dd1e4ba65 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
@@ -12,18 +12,20 @@
*
*/
#include <linux/kernel.h>
+#include <linux/component.h>
#include <linux/platform_device.h>
#include <linux/clk.h>
#include <linux/pm_runtime.h>
#include <linux/mfd/syscon.h>
+#include <linux/of_device.h>
#include <linux/regmap.h>
#include <drm/drmP.h>
#include <drm/exynos_drm.h>
#include "regs-gsc.h"
#include "exynos_drm_drv.h"
+#include "exynos_drm_iommu.h"
#include "exynos_drm_ipp.h"
-#include "exynos_drm_gsc.h"
/*
* GSC stands for General SCaler and
@@ -31,26 +33,10 @@
* input DMA reads image data from the memory.
* output DMA writes image data to memory.
* GSC supports image rotation and image effect functions.
- *
- * M2M operation : supports crop/scale/rotation/csc so on.
- * Memory ----> GSC H/W ----> Memory.
- * Writeback operation : supports cloned screen with FIMD.
- * FIMD ----> GSC H/W ----> Memory.
- * Output operation : supports direct display using local path.
- * Memory ----> GSC H/W ----> FIMD, Mixer.
*/
-/*
- * TODO
- * 1. check suspend/resume api if needed.
- * 2. need to check use case platform_device_id.
- * 3. check src/dst size with, height.
- * 4. added check_prepare api for right register.
- * 5. need to add supported list in prop_list.
- * 6. check prescaler/scaler optimization.
- */
-#define GSC_MAX_DEVS 4
+#define GSC_MAX_CLOCKS 8
#define GSC_MAX_SRC 4
#define GSC_MAX_DST 16
#define GSC_RESET_TIMEOUT 50
@@ -65,8 +51,6 @@
#define GSC_SC_DOWN_RATIO_4_8 131072
#define GSC_SC_DOWN_RATIO_3_8 174762
#define GSC_SC_DOWN_RATIO_2_8 262144
-#define GSC_REFRESH_MIN 12
-#define GSC_REFRESH_MAX 60
#define GSC_CROP_MAX 8192
#define GSC_CROP_MIN 32
#define GSC_SCALE_MAX 4224
@@ -77,10 +61,9 @@
#define GSC_COEF_H_8T 8
#define GSC_COEF_V_4T 4
#define GSC_COEF_DEPTH 3
+#define GSC_AUTOSUSPEND_DELAY 2000
#define get_gsc_context(dev) platform_get_drvdata(to_platform_device(dev))
-#define get_ctx_from_ippdrv(ippdrv) container_of(ippdrv,\
- struct gsc_context, ippdrv);
#define gsc_read(offset) readl(ctx->regs + (offset))
#define gsc_write(cfg, offset) writel(cfg, ctx->regs + (offset))
@@ -104,50 +87,47 @@ struct gsc_scaler {
};
/*
- * A structure of scaler capability.
- *
- * find user manual 49.2 features.
- * @tile_w: tile mode or rotation width.
- * @tile_h: tile mode or rotation height.
- * @w: other cases width.
- * @h: other cases height.
- */
-struct gsc_capability {
- /* tile or rotation */
- u32 tile_w;
- u32 tile_h;
- /* other cases */
- u32 w;
- u32 h;
-};
-
-/*
* A structure of gsc context.
*
- * @ippdrv: prepare initialization using ippdrv.
* @regs_res: register resources.
* @regs: memory mapped io registers.
- * @sysreg: handle to SYSREG block regmap.
- * @lock: locking of operations.
* @gsc_clk: gsc gate clock.
* @sc: scaler infomations.
* @id: gsc id.
* @irq: irq number.
* @rotation: supports rotation of src.
- * @suspended: qos operations.
*/
struct gsc_context {
- struct exynos_drm_ippdrv ippdrv;
+ struct exynos_drm_ipp ipp;
+ struct drm_device *drm_dev;
+ struct device *dev;
+ struct exynos_drm_ipp_task *task;
+ struct exynos_drm_ipp_formats *formats;
+ unsigned int num_formats;
+
struct resource *regs_res;
void __iomem *regs;
- struct regmap *sysreg;
- struct mutex lock;
- struct clk *gsc_clk;
+ const char **clk_names;
+ struct clk *clocks[GSC_MAX_CLOCKS];
+ int num_clocks;
struct gsc_scaler sc;
int id;
int irq;
bool rotation;
- bool suspended;
+};
+
+/**
+ * struct gsc_driverdata - per device type driver data for init time.
+ *
+ * @limits: picture size limits array
+ * @clk_names: names of clocks needed by this variant
+ * @num_clocks: the number of clocks needed by this variant
+ */
+struct gsc_driverdata {
+ const struct drm_exynos_ipp_limit *limits;
+ int num_limits;
+ const char *clk_names[GSC_MAX_CLOCKS];
+ int num_clocks;
};
/* 8-tap Filter Coefficient */
@@ -438,25 +418,6 @@ static int gsc_sw_reset(struct gsc_context *ctx)
return 0;
}
-static void gsc_set_gscblk_fimd_wb(struct gsc_context *ctx, bool enable)
-{
- unsigned int gscblk_cfg;
-
- if (!ctx->sysreg)
- return;
-
- regmap_read(ctx->sysreg, SYSREG_GSCBLK_CFG1, &gscblk_cfg);
-
- if (enable)
- gscblk_cfg |= GSC_BLK_DISP1WB_DEST(ctx->id) |
- GSC_BLK_GSCL_WB_IN_SRC_SEL(ctx->id) |
- GSC_BLK_SW_RESET_WB_DEST(ctx->id);
- else
- gscblk_cfg |= GSC_BLK_PXLASYNC_LO_MASK_WB(ctx->id);
-
- regmap_write(ctx->sysreg, SYSREG_GSCBLK_CFG1, gscblk_cfg);
-}
-
static void gsc_handle_irq(struct gsc_context *ctx, bool enable,
bool overflow, bool done)
{
@@ -487,10 +448,8 @@ static void gsc_handle_irq(struct gsc_context *ctx, bool enable,
}
-static int gsc_src_set_fmt(struct device *dev, u32 fmt)
+static void gsc_src_set_fmt(struct gsc_context *ctx, u32 fmt)
{
- struct gsc_context *ctx = get_gsc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
u32 cfg;
DRM_DEBUG_KMS("fmt[0x%x]\n", fmt);
@@ -506,6 +465,7 @@ static int gsc_src_set_fmt(struct device *dev, u32 fmt)
cfg |= GSC_IN_RGB565;
break;
case DRM_FORMAT_XRGB8888:
+ case DRM_FORMAT_ARGB8888:
cfg |= GSC_IN_XRGB8888;
break;
case DRM_FORMAT_BGRX8888:
@@ -548,115 +508,84 @@ static int gsc_src_set_fmt(struct device *dev, u32 fmt)
cfg |= (GSC_IN_CHROMA_ORDER_CBCR |
GSC_IN_YUV420_2P);
break;
- default:
- dev_err(ippdrv->dev, "invalid target yuv order 0x%x.\n", fmt);
- return -EINVAL;
}
gsc_write(cfg, GSC_IN_CON);
-
- return 0;
}
-static int gsc_src_set_transf(struct device *dev,
- enum drm_exynos_degree degree,
- enum drm_exynos_flip flip, bool *swap)
+static void gsc_src_set_transf(struct gsc_context *ctx, unsigned int rotation)
{
- struct gsc_context *ctx = get_gsc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+ unsigned int degree = rotation & DRM_MODE_ROTATE_MASK;
u32 cfg;
- DRM_DEBUG_KMS("degree[%d]flip[0x%x]\n", degree, flip);
-
cfg = gsc_read(GSC_IN_CON);
cfg &= ~GSC_IN_ROT_MASK;
switch (degree) {
- case EXYNOS_DRM_DEGREE_0:
- if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+ case DRM_MODE_ROTATE_0:
+ if (rotation & DRM_MODE_REFLECT_Y)
cfg |= GSC_IN_ROT_XFLIP;
- if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+ if (rotation & DRM_MODE_REFLECT_X)
cfg |= GSC_IN_ROT_YFLIP;
break;
- case EXYNOS_DRM_DEGREE_90:
- if (flip & EXYNOS_DRM_FLIP_VERTICAL)
- cfg |= GSC_IN_ROT_90_XFLIP;
- else if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
- cfg |= GSC_IN_ROT_90_YFLIP;
- else
- cfg |= GSC_IN_ROT_90;
+ case DRM_MODE_ROTATE_90:
+ cfg |= GSC_IN_ROT_90;
+ if (rotation & DRM_MODE_REFLECT_Y)
+ cfg |= GSC_IN_ROT_XFLIP;
+ if (rotation & DRM_MODE_REFLECT_X)
+ cfg |= GSC_IN_ROT_YFLIP;
break;
- case EXYNOS_DRM_DEGREE_180:
+ case DRM_MODE_ROTATE_180:
cfg |= GSC_IN_ROT_180;
- if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+ if (rotation & DRM_MODE_REFLECT_Y)
cfg &= ~GSC_IN_ROT_XFLIP;
- if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+ if (rotation & DRM_MODE_REFLECT_X)
cfg &= ~GSC_IN_ROT_YFLIP;
break;
- case EXYNOS_DRM_DEGREE_270:
+ case DRM_MODE_ROTATE_270:
cfg |= GSC_IN_ROT_270;
- if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+ if (rotation & DRM_MODE_REFLECT_Y)
cfg &= ~GSC_IN_ROT_XFLIP;
- if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+ if (rotation & DRM_MODE_REFLECT_X)
cfg &= ~GSC_IN_ROT_YFLIP;
break;
- default:
- dev_err(ippdrv->dev, "invalid degree value %d.\n", degree);
- return -EINVAL;
}
gsc_write(cfg, GSC_IN_CON);
ctx->rotation = (cfg & GSC_IN_ROT_90) ? 1 : 0;
- *swap = ctx->rotation;
-
- return 0;
}
-static int gsc_src_set_size(struct device *dev, int swap,
- struct drm_exynos_pos *pos, struct drm_exynos_sz *sz)
+static void gsc_src_set_size(struct gsc_context *ctx,
+ struct exynos_drm_ipp_buffer *buf)
{
- struct gsc_context *ctx = get_gsc_context(dev);
- struct drm_exynos_pos img_pos = *pos;
struct gsc_scaler *sc = &ctx->sc;
u32 cfg;
- DRM_DEBUG_KMS("swap[%d]x[%d]y[%d]w[%d]h[%d]\n",
- swap, pos->x, pos->y, pos->w, pos->h);
-
- if (swap) {
- img_pos.w = pos->h;
- img_pos.h = pos->w;
- }
-
/* pixel offset */
- cfg = (GSC_SRCIMG_OFFSET_X(img_pos.x) |
- GSC_SRCIMG_OFFSET_Y(img_pos.y));
+ cfg = (GSC_SRCIMG_OFFSET_X(buf->rect.x) |
+ GSC_SRCIMG_OFFSET_Y(buf->rect.y));
gsc_write(cfg, GSC_SRCIMG_OFFSET);
/* cropped size */
- cfg = (GSC_CROPPED_WIDTH(img_pos.w) |
- GSC_CROPPED_HEIGHT(img_pos.h));
+ cfg = (GSC_CROPPED_WIDTH(buf->rect.w) |
+ GSC_CROPPED_HEIGHT(buf->rect.h));
gsc_write(cfg, GSC_CROPPED_SIZE);
- DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", sz->hsize, sz->vsize);
-
/* original size */
cfg = gsc_read(GSC_SRCIMG_SIZE);
cfg &= ~(GSC_SRCIMG_HEIGHT_MASK |
GSC_SRCIMG_WIDTH_MASK);
- cfg |= (GSC_SRCIMG_WIDTH(sz->hsize) |
- GSC_SRCIMG_HEIGHT(sz->vsize));
+ cfg |= (GSC_SRCIMG_WIDTH(buf->buf.width) |
+ GSC_SRCIMG_HEIGHT(buf->buf.height));
gsc_write(cfg, GSC_SRCIMG_SIZE);
cfg = gsc_read(GSC_IN_CON);
cfg &= ~GSC_IN_RGB_TYPE_MASK;
- DRM_DEBUG_KMS("width[%d]range[%d]\n", pos->w, sc->range);
-
- if (pos->w >= GSC_WIDTH_ITU_709)
+ if (buf->rect.w >= GSC_WIDTH_ITU_709)
if (sc->range)
cfg |= GSC_IN_RGB_HD_WIDE;
else
@@ -668,103 +597,39 @@ static int gsc_src_set_size(struct device *dev, int swap,
cfg |= GSC_IN_RGB_SD_NARROW;
gsc_write(cfg, GSC_IN_CON);
-
- return 0;
}
-static int gsc_src_set_buf_seq(struct gsc_context *ctx, u32 buf_id,
- enum drm_exynos_ipp_buf_type buf_type)
+static void gsc_src_set_buf_seq(struct gsc_context *ctx, u32 buf_id,
+ bool enqueue)
{
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
- bool masked;
+ bool masked = !enqueue;
u32 cfg;
u32 mask = 0x00000001 << buf_id;
- DRM_DEBUG_KMS("buf_id[%d]buf_type[%d]\n", buf_id, buf_type);
-
/* mask register set */
cfg = gsc_read(GSC_IN_BASE_ADDR_Y_MASK);
- switch (buf_type) {
- case IPP_BUF_ENQUEUE:
- masked = false;
- break;
- case IPP_BUF_DEQUEUE:
- masked = true;
- break;
- default:
- dev_err(ippdrv->dev, "invalid buf ctrl parameter.\n");
- return -EINVAL;
- }
-
/* sequence id */
cfg &= ~mask;
cfg |= masked << buf_id;
gsc_write(cfg, GSC_IN_BASE_ADDR_Y_MASK);
gsc_write(cfg, GSC_IN_BASE_ADDR_CB_MASK);
gsc_write(cfg, GSC_IN_BASE_ADDR_CR_MASK);
-
- return 0;
}
-static int gsc_src_set_addr(struct device *dev,
- struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id,
- enum drm_exynos_ipp_buf_type buf_type)
+static void gsc_src_set_addr(struct gsc_context *ctx, u32 buf_id,
+ struct exynos_drm_ipp_buffer *buf)
{
- struct gsc_context *ctx = get_gsc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
- struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node;
- struct drm_exynos_ipp_property *property;
-
- if (!c_node) {
- DRM_ERROR("failed to get c_node.\n");
- return -EFAULT;
- }
-
- property = &c_node->property;
-
- DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]buf_type[%d]\n",
- property->prop_id, buf_id, buf_type);
-
- if (buf_id > GSC_MAX_SRC) {
- dev_info(ippdrv->dev, "invalid buf_id %d.\n", buf_id);
- return -EINVAL;
- }
-
/* address register set */
- switch (buf_type) {
- case IPP_BUF_ENQUEUE:
- gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_Y],
- GSC_IN_BASE_ADDR_Y(buf_id));
- gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_CB],
- GSC_IN_BASE_ADDR_CB(buf_id));
- gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_CR],
- GSC_IN_BASE_ADDR_CR(buf_id));
- break;
- case IPP_BUF_DEQUEUE:
- gsc_write(0x0, GSC_IN_BASE_ADDR_Y(buf_id));
- gsc_write(0x0, GSC_IN_BASE_ADDR_CB(buf_id));
- gsc_write(0x0, GSC_IN_BASE_ADDR_CR(buf_id));
- break;
- default:
- /* bypass */
- break;
- }
+ gsc_write(buf->dma_addr[0], GSC_IN_BASE_ADDR_Y(buf_id));
+ gsc_write(buf->dma_addr[1], GSC_IN_BASE_ADDR_CB(buf_id));
+ gsc_write(buf->dma_addr[2], GSC_IN_BASE_ADDR_CR(buf_id));
- return gsc_src_set_buf_seq(ctx, buf_id, buf_type);
+ gsc_src_set_buf_seq(ctx, buf_id, true);
}
-static struct exynos_drm_ipp_ops gsc_src_ops = {
- .set_fmt = gsc_src_set_fmt,
- .set_transf = gsc_src_set_transf,
- .set_size = gsc_src_set_size,
- .set_addr = gsc_src_set_addr,
-};
-
-static int gsc_dst_set_fmt(struct device *dev, u32 fmt)
+static void gsc_dst_set_fmt(struct gsc_context *ctx, u32 fmt)
{
- struct gsc_context *ctx = get_gsc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
u32 cfg;
DRM_DEBUG_KMS("fmt[0x%x]\n", fmt);
@@ -779,8 +644,9 @@ static int gsc_dst_set_fmt(struct device *dev, u32 fmt)
case DRM_FORMAT_RGB565:
cfg |= GSC_OUT_RGB565;
break;
+ case DRM_FORMAT_ARGB8888:
case DRM_FORMAT_XRGB8888:
- cfg |= GSC_OUT_XRGB8888;
+ cfg |= (GSC_OUT_XRGB8888 | GSC_OUT_GLOBAL_ALPHA(0xff));
break;
case DRM_FORMAT_BGRX8888:
cfg |= (GSC_OUT_XRGB8888 | GSC_OUT_RB_SWAP);
@@ -819,69 +685,9 @@ static int gsc_dst_set_fmt(struct device *dev, u32 fmt)
cfg |= (GSC_OUT_CHROMA_ORDER_CBCR |
GSC_OUT_YUV420_2P);
break;
- default:
- dev_err(ippdrv->dev, "invalid target yuv order 0x%x.\n", fmt);
- return -EINVAL;
}
gsc_write(cfg, GSC_OUT_CON);
-
- return 0;
-}
-
-static int gsc_dst_set_transf(struct device *dev,
- enum drm_exynos_degree degree,
- enum drm_exynos_flip flip, bool *swap)
-{
- struct gsc_context *ctx = get_gsc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
- u32 cfg;
-
- DRM_DEBUG_KMS("degree[%d]flip[0x%x]\n", degree, flip);
-
- cfg = gsc_read(GSC_IN_CON);
- cfg &= ~GSC_IN_ROT_MASK;
-
- switch (degree) {
- case EXYNOS_DRM_DEGREE_0:
- if (flip & EXYNOS_DRM_FLIP_VERTICAL)
- cfg |= GSC_IN_ROT_XFLIP;
- if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
- cfg |= GSC_IN_ROT_YFLIP;
- break;
- case EXYNOS_DRM_DEGREE_90:
- if (flip & EXYNOS_DRM_FLIP_VERTICAL)
- cfg |= GSC_IN_ROT_90_XFLIP;
- else if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
- cfg |= GSC_IN_ROT_90_YFLIP;
- else
- cfg |= GSC_IN_ROT_90;
- break;
- case EXYNOS_DRM_DEGREE_180:
- cfg |= GSC_IN_ROT_180;
- if (flip & EXYNOS_DRM_FLIP_VERTICAL)
- cfg &= ~GSC_IN_ROT_XFLIP;
- if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
- cfg &= ~GSC_IN_ROT_YFLIP;
- break;
- case EXYNOS_DRM_DEGREE_270:
- cfg |= GSC_IN_ROT_270;
- if (flip & EXYNOS_DRM_FLIP_VERTICAL)
- cfg &= ~GSC_IN_ROT_XFLIP;
- if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
- cfg &= ~GSC_IN_ROT_YFLIP;
- break;
- default:
- dev_err(ippdrv->dev, "invalid degree value %d.\n", degree);
- return -EINVAL;
- }
-
- gsc_write(cfg, GSC_IN_CON);
-
- ctx->rotation = (cfg & GSC_IN_ROT_90) ? 1 : 0;
- *swap = ctx->rotation;
-
- return 0;
}
static int gsc_get_ratio_shift(u32 src, u32 dst, u32 *ratio)
@@ -919,9 +725,9 @@ static void gsc_get_prescaler_shfactor(u32 hratio, u32 vratio, u32 *shfactor)
}
static int gsc_set_prescaler(struct gsc_context *ctx, struct gsc_scaler *sc,
- struct drm_exynos_pos *src, struct drm_exynos_pos *dst)
+ struct drm_exynos_ipp_task_rect *src,
+ struct drm_exynos_ipp_task_rect *dst)
{
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
u32 cfg;
u32 src_w, src_h, dst_w, dst_h;
int ret = 0;
@@ -939,13 +745,13 @@ static int gsc_set_prescaler(struct gsc_context *ctx, struct gsc_scaler *sc,
ret = gsc_get_ratio_shift(src_w, dst_w, &sc->pre_hratio);
if (ret) {
- dev_err(ippdrv->dev, "failed to get ratio horizontal.\n");
+ dev_err(ctx->dev, "failed to get ratio horizontal.\n");
return ret;
}
ret = gsc_get_ratio_shift(src_h, dst_h, &sc->pre_vratio);
if (ret) {
- dev_err(ippdrv->dev, "failed to get ratio vertical.\n");
+ dev_err(ctx->dev, "failed to get ratio vertical.\n");
return ret;
}
@@ -1039,47 +845,37 @@ static void gsc_set_scaler(struct gsc_context *ctx, struct gsc_scaler *sc)
gsc_write(cfg, GSC_MAIN_V_RATIO);
}
-static int gsc_dst_set_size(struct device *dev, int swap,
- struct drm_exynos_pos *pos, struct drm_exynos_sz *sz)
+static void gsc_dst_set_size(struct gsc_context *ctx,
+ struct exynos_drm_ipp_buffer *buf)
{
- struct gsc_context *ctx = get_gsc_context(dev);
- struct drm_exynos_pos img_pos = *pos;
struct gsc_scaler *sc = &ctx->sc;
u32 cfg;
- DRM_DEBUG_KMS("swap[%d]x[%d]y[%d]w[%d]h[%d]\n",
- swap, pos->x, pos->y, pos->w, pos->h);
-
- if (swap) {
- img_pos.w = pos->h;
- img_pos.h = pos->w;
- }
-
/* pixel offset */
- cfg = (GSC_DSTIMG_OFFSET_X(pos->x) |
- GSC_DSTIMG_OFFSET_Y(pos->y));
+ cfg = (GSC_DSTIMG_OFFSET_X(buf->rect.x) |
+ GSC_DSTIMG_OFFSET_Y(buf->rect.y));
gsc_write(cfg, GSC_DSTIMG_OFFSET);
/* scaled size */
- cfg = (GSC_SCALED_WIDTH(img_pos.w) | GSC_SCALED_HEIGHT(img_pos.h));
+ if (ctx->rotation)
+ cfg = (GSC_SCALED_WIDTH(buf->rect.h) |
+ GSC_SCALED_HEIGHT(buf->rect.w));
+ else
+ cfg = (GSC_SCALED_WIDTH(buf->rect.w) |
+ GSC_SCALED_HEIGHT(buf->rect.h));
gsc_write(cfg, GSC_SCALED_SIZE);
- DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", sz->hsize, sz->vsize);
-
/* original size */
cfg = gsc_read(GSC_DSTIMG_SIZE);
- cfg &= ~(GSC_DSTIMG_HEIGHT_MASK |
- GSC_DSTIMG_WIDTH_MASK);
- cfg |= (GSC_DSTIMG_WIDTH(sz->hsize) |
- GSC_DSTIMG_HEIGHT(sz->vsize));
+ cfg &= ~(GSC_DSTIMG_HEIGHT_MASK | GSC_DSTIMG_WIDTH_MASK);
+ cfg |= GSC_DSTIMG_WIDTH(buf->buf.width) |
+ GSC_DSTIMG_HEIGHT(buf->buf.height);
gsc_write(cfg, GSC_DSTIMG_SIZE);
cfg = gsc_read(GSC_OUT_CON);
cfg &= ~GSC_OUT_RGB_TYPE_MASK;
- DRM_DEBUG_KMS("width[%d]range[%d]\n", pos->w, sc->range);
-
- if (pos->w >= GSC_WIDTH_ITU_709)
+ if (buf->rect.w >= GSC_WIDTH_ITU_709)
if (sc->range)
cfg |= GSC_OUT_RGB_HD_WIDE;
else
@@ -1091,8 +887,6 @@ static int gsc_dst_set_size(struct device *dev, int swap,
cfg |= GSC_OUT_RGB_SD_NARROW;
gsc_write(cfg, GSC_OUT_CON);
-
- return 0;
}
static int gsc_dst_get_buf_seq(struct gsc_context *ctx)
@@ -1111,35 +905,16 @@ static int gsc_dst_get_buf_seq(struct gsc_context *ctx)
return buf_num;
}
-static int gsc_dst_set_buf_seq(struct gsc_context *ctx, u32 buf_id,
- enum drm_exynos_ipp_buf_type buf_type)
+static void gsc_dst_set_buf_seq(struct gsc_context *ctx, u32 buf_id,
+ bool enqueue)
{
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
- bool masked;
+ bool masked = !enqueue;
u32 cfg;
u32 mask = 0x00000001 << buf_id;
- int ret = 0;
-
- DRM_DEBUG_KMS("buf_id[%d]buf_type[%d]\n", buf_id, buf_type);
-
- mutex_lock(&ctx->lock);
/* mask register set */
cfg = gsc_read(GSC_OUT_BASE_ADDR_Y_MASK);
- switch (buf_type) {
- case IPP_BUF_ENQUEUE:
- masked = false;
- break;
- case IPP_BUF_DEQUEUE:
- masked = true;
- break;
- default:
- dev_err(ippdrv->dev, "invalid buf ctrl parameter.\n");
- ret = -EINVAL;
- goto err_unlock;
- }
-
/* sequence id */
cfg &= ~mask;
cfg |= masked << buf_id;
@@ -1148,94 +923,29 @@ static int gsc_dst_set_buf_seq(struct gsc_context *ctx, u32 buf_id,
gsc_write(cfg, GSC_OUT_BASE_ADDR_CR_MASK);
/* interrupt enable */
- if (buf_type == IPP_BUF_ENQUEUE &&
- gsc_dst_get_buf_seq(ctx) >= GSC_BUF_START)
+ if (enqueue && gsc_dst_get_buf_seq(ctx) >= GSC_BUF_START)
gsc_handle_irq(ctx, true, false, true);
/* interrupt disable */
- if (buf_type == IPP_BUF_DEQUEUE &&
- gsc_dst_get_buf_seq(ctx) <= GSC_BUF_STOP)
+ if (!enqueue && gsc_dst_get_buf_seq(ctx) <= GSC_BUF_STOP)
gsc_handle_irq(ctx, false, false, true);
-
-err_unlock:
- mutex_unlock(&ctx->lock);
- return ret;
}
-static int gsc_dst_set_addr(struct device *dev,
- struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id,
- enum drm_exynos_ipp_buf_type buf_type)
+static void gsc_dst_set_addr(struct gsc_context *ctx,
+ u32 buf_id, struct exynos_drm_ipp_buffer *buf)
{
- struct gsc_context *ctx = get_gsc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
- struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node;
- struct drm_exynos_ipp_property *property;
-
- if (!c_node) {
- DRM_ERROR("failed to get c_node.\n");
- return -EFAULT;
- }
-
- property = &c_node->property;
-
- DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]buf_type[%d]\n",
- property->prop_id, buf_id, buf_type);
-
- if (buf_id > GSC_MAX_DST) {
- dev_info(ippdrv->dev, "invalid buf_id %d.\n", buf_id);
- return -EINVAL;
- }
-
/* address register set */
- switch (buf_type) {
- case IPP_BUF_ENQUEUE:
- gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_Y],
- GSC_OUT_BASE_ADDR_Y(buf_id));
- gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_CB],
- GSC_OUT_BASE_ADDR_CB(buf_id));
- gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_CR],
- GSC_OUT_BASE_ADDR_CR(buf_id));
- break;
- case IPP_BUF_DEQUEUE:
- gsc_write(0x0, GSC_OUT_BASE_ADDR_Y(buf_id));
- gsc_write(0x0, GSC_OUT_BASE_ADDR_CB(buf_id));
- gsc_write(0x0, GSC_OUT_BASE_ADDR_CR(buf_id));
- break;
- default:
- /* bypass */
- break;
- }
+ gsc_write(buf->dma_addr[0], GSC_OUT_BASE_ADDR_Y(buf_id));
+ gsc_write(buf->dma_addr[1], GSC_OUT_BASE_ADDR_CB(buf_id));
+ gsc_write(buf->dma_addr[2], GSC_OUT_BASE_ADDR_CR(buf_id));
- return gsc_dst_set_buf_seq(ctx, buf_id, buf_type);
-}
-
-static struct exynos_drm_ipp_ops gsc_dst_ops = {
- .set_fmt = gsc_dst_set_fmt,
- .set_transf = gsc_dst_set_transf,
- .set_size = gsc_dst_set_size,
- .set_addr = gsc_dst_set_addr,
-};
-
-static int gsc_clk_ctrl(struct gsc_context *ctx, bool enable)
-{
- DRM_DEBUG_KMS("enable[%d]\n", enable);
-
- if (enable) {
- clk_prepare_enable(ctx->gsc_clk);
- ctx->suspended = false;
- } else {
- clk_disable_unprepare(ctx->gsc_clk);
- ctx->suspended = true;
- }
-
- return 0;
+ gsc_dst_set_buf_seq(ctx, buf_id, true);
}
static int gsc_get_src_buf_index(struct gsc_context *ctx)
{
u32 cfg, curr_index, i;
u32 buf_id = GSC_MAX_SRC;
- int ret;
DRM_DEBUG_KMS("gsc id[%d]\n", ctx->id);
@@ -1249,19 +959,15 @@ static int gsc_get_src_buf_index(struct gsc_context *ctx)
}
}
+ DRM_DEBUG_KMS("cfg[0x%x]curr_index[%d]buf_id[%d]\n", cfg,
+ curr_index, buf_id);
+
if (buf_id == GSC_MAX_SRC) {
DRM_ERROR("failed to get in buffer index.\n");
return -EINVAL;
}
- ret = gsc_src_set_buf_seq(ctx, buf_id, IPP_BUF_DEQUEUE);
- if (ret < 0) {
- DRM_ERROR("failed to dequeue.\n");
- return ret;
- }
-
- DRM_DEBUG_KMS("cfg[0x%x]curr_index[%d]buf_id[%d]\n", cfg,
- curr_index, buf_id);
+ gsc_src_set_buf_seq(ctx, buf_id, false);
return buf_id;
}
@@ -1270,7 +976,6 @@ static int gsc_get_dst_buf_index(struct gsc_context *ctx)
{
u32 cfg, curr_index, i;
u32 buf_id = GSC_MAX_DST;
- int ret;
DRM_DEBUG_KMS("gsc id[%d]\n", ctx->id);
@@ -1289,11 +994,7 @@ static int gsc_get_dst_buf_index(struct gsc_context *ctx)
return -EINVAL;
}
- ret = gsc_dst_set_buf_seq(ctx, buf_id, IPP_BUF_DEQUEUE);
- if (ret < 0) {
- DRM_ERROR("failed to dequeue.\n");
- return ret;
- }
+ gsc_dst_set_buf_seq(ctx, buf_id, false);
DRM_DEBUG_KMS("cfg[0x%x]curr_index[%d]buf_id[%d]\n", cfg,
curr_index, buf_id);
@@ -1304,215 +1005,55 @@ static int gsc_get_dst_buf_index(struct gsc_context *ctx)
static irqreturn_t gsc_irq_handler(int irq, void *dev_id)
{
struct gsc_context *ctx = dev_id;
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
- struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node;
- struct drm_exynos_ipp_event_work *event_work =
- c_node->event_work;
u32 status;
- int buf_id[EXYNOS_DRM_OPS_MAX];
+ int err = 0;
DRM_DEBUG_KMS("gsc id[%d]\n", ctx->id);
status = gsc_read(GSC_IRQ);
if (status & GSC_IRQ_STATUS_OR_IRQ) {
- dev_err(ippdrv->dev, "occurred overflow at %d, status 0x%x.\n",
+ dev_err(ctx->dev, "occurred overflow at %d, status 0x%x.\n",
ctx->id, status);
- return IRQ_NONE;
+ err = -EINVAL;
}
if (status & GSC_IRQ_STATUS_OR_FRM_DONE) {
- dev_dbg(ippdrv->dev, "occurred frame done at %d, status 0x%x.\n",
- ctx->id, status);
-
- buf_id[EXYNOS_DRM_OPS_SRC] = gsc_get_src_buf_index(ctx);
- if (buf_id[EXYNOS_DRM_OPS_SRC] < 0)
- return IRQ_HANDLED;
-
- buf_id[EXYNOS_DRM_OPS_DST] = gsc_get_dst_buf_index(ctx);
- if (buf_id[EXYNOS_DRM_OPS_DST] < 0)
- return IRQ_HANDLED;
-
- DRM_DEBUG_KMS("buf_id_src[%d]buf_id_dst[%d]\n",
- buf_id[EXYNOS_DRM_OPS_SRC], buf_id[EXYNOS_DRM_OPS_DST]);
-
- event_work->ippdrv = ippdrv;
- event_work->buf_id[EXYNOS_DRM_OPS_SRC] =
- buf_id[EXYNOS_DRM_OPS_SRC];
- event_work->buf_id[EXYNOS_DRM_OPS_DST] =
- buf_id[EXYNOS_DRM_OPS_DST];
- queue_work(ippdrv->event_workq, &event_work->work);
- }
-
- return IRQ_HANDLED;
-}
-
-static int gsc_init_prop_list(struct exynos_drm_ippdrv *ippdrv)
-{
- struct drm_exynos_ipp_prop_list *prop_list = &ippdrv->prop_list;
-
- prop_list->version = 1;
- prop_list->writeback = 1;
- prop_list->refresh_min = GSC_REFRESH_MIN;
- prop_list->refresh_max = GSC_REFRESH_MAX;
- prop_list->flip = (1 << EXYNOS_DRM_FLIP_VERTICAL) |
- (1 << EXYNOS_DRM_FLIP_HORIZONTAL);
- prop_list->degree = (1 << EXYNOS_DRM_DEGREE_0) |
- (1 << EXYNOS_DRM_DEGREE_90) |
- (1 << EXYNOS_DRM_DEGREE_180) |
- (1 << EXYNOS_DRM_DEGREE_270);
- prop_list->csc = 1;
- prop_list->crop = 1;
- prop_list->crop_max.hsize = GSC_CROP_MAX;
- prop_list->crop_max.vsize = GSC_CROP_MAX;
- prop_list->crop_min.hsize = GSC_CROP_MIN;
- prop_list->crop_min.vsize = GSC_CROP_MIN;
- prop_list->scale = 1;
- prop_list->scale_max.hsize = GSC_SCALE_MAX;
- prop_list->scale_max.vsize = GSC_SCALE_MAX;
- prop_list->scale_min.hsize = GSC_SCALE_MIN;
- prop_list->scale_min.vsize = GSC_SCALE_MIN;
-
- return 0;
-}
-
-static inline bool gsc_check_drm_flip(enum drm_exynos_flip flip)
-{
- switch (flip) {
- case EXYNOS_DRM_FLIP_NONE:
- case EXYNOS_DRM_FLIP_VERTICAL:
- case EXYNOS_DRM_FLIP_HORIZONTAL:
- case EXYNOS_DRM_FLIP_BOTH:
- return true;
- default:
- DRM_DEBUG_KMS("invalid flip\n");
- return false;
- }
-}
-
-static int gsc_ippdrv_check_property(struct device *dev,
- struct drm_exynos_ipp_property *property)
-{
- struct gsc_context *ctx = get_gsc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
- struct drm_exynos_ipp_prop_list *pp = &ippdrv->prop_list;
- struct drm_exynos_ipp_config *config;
- struct drm_exynos_pos *pos;
- struct drm_exynos_sz *sz;
- bool swap;
- int i;
-
- for_each_ipp_ops(i) {
- if ((i == EXYNOS_DRM_OPS_SRC) &&
- (property->cmd == IPP_CMD_WB))
- continue;
+ int src_buf_id, dst_buf_id;
- config = &property->config[i];
- pos = &config->pos;
- sz = &config->sz;
-
- /* check for flip */
- if (!gsc_check_drm_flip(config->flip)) {
- DRM_ERROR("invalid flip.\n");
- goto err_property;
- }
-
- /* check for degree */
- switch (config->degree) {
- case EXYNOS_DRM_DEGREE_90:
- case EXYNOS_DRM_DEGREE_270:
- swap = true;
- break;
- case EXYNOS_DRM_DEGREE_0:
- case EXYNOS_DRM_DEGREE_180:
- swap = false;
- break;
- default:
- DRM_ERROR("invalid degree.\n");
- goto err_property;
- }
+ dev_dbg(ctx->dev, "occurred frame done at %d, status 0x%x.\n",
+ ctx->id, status);
- /* check for buffer bound */
- if ((pos->x + pos->w > sz->hsize) ||
- (pos->y + pos->h > sz->vsize)) {
- DRM_ERROR("out of buf bound.\n");
- goto err_property;
- }
+ src_buf_id = gsc_get_src_buf_index(ctx);
+ dst_buf_id = gsc_get_dst_buf_index(ctx);
- /* check for crop */
- if ((i == EXYNOS_DRM_OPS_SRC) && (pp->crop)) {
- if (swap) {
- if ((pos->h < pp->crop_min.hsize) ||
- (sz->vsize > pp->crop_max.hsize) ||
- (pos->w < pp->crop_min.vsize) ||
- (sz->hsize > pp->crop_max.vsize)) {
- DRM_ERROR("out of crop size.\n");
- goto err_property;
- }
- } else {
- if ((pos->w < pp->crop_min.hsize) ||
- (sz->hsize > pp->crop_max.hsize) ||
- (pos->h < pp->crop_min.vsize) ||
- (sz->vsize > pp->crop_max.vsize)) {
- DRM_ERROR("out of crop size.\n");
- goto err_property;
- }
- }
- }
+ DRM_DEBUG_KMS("buf_id_src[%d]buf_id_dst[%d]\n", src_buf_id,
+ dst_buf_id);
- /* check for scale */
- if ((i == EXYNOS_DRM_OPS_DST) && (pp->scale)) {
- if (swap) {
- if ((pos->h < pp->scale_min.hsize) ||
- (sz->vsize > pp->scale_max.hsize) ||
- (pos->w < pp->scale_min.vsize) ||
- (sz->hsize > pp->scale_max.vsize)) {
- DRM_ERROR("out of scale size.\n");
- goto err_property;
- }
- } else {
- if ((pos->w < pp->scale_min.hsize) ||
- (sz->hsize > pp->scale_max.hsize) ||
- (pos->h < pp->scale_min.vsize) ||
- (sz->vsize > pp->scale_max.vsize)) {
- DRM_ERROR("out of scale size.\n");
- goto err_property;
- }
- }
- }
+ if (src_buf_id < 0 || dst_buf_id < 0)
+ err = -EINVAL;
}
- return 0;
-
-err_property:
- for_each_ipp_ops(i) {
- if ((i == EXYNOS_DRM_OPS_SRC) &&
- (property->cmd == IPP_CMD_WB))
- continue;
+ if (ctx->task) {
+ struct exynos_drm_ipp_task *task = ctx->task;
- config = &property->config[i];
- pos = &config->pos;
- sz = &config->sz;
-
- DRM_ERROR("[%s]f[%d]r[%d]pos[%d %d %d %d]sz[%d %d]\n",
- i ? "dst" : "src", config->flip, config->degree,
- pos->x, pos->y, pos->w, pos->h,
- sz->hsize, sz->vsize);
+ ctx->task = NULL;
+ pm_runtime_mark_last_busy(ctx->dev);
+ pm_runtime_put_autosuspend(ctx->dev);
+ exynos_drm_ipp_task_done(task, err);
}
- return -EINVAL;
+ return IRQ_HANDLED;
}
-
-static int gsc_ippdrv_reset(struct device *dev)
+static int gsc_reset(struct gsc_context *ctx)
{
- struct gsc_context *ctx = get_gsc_context(dev);
struct gsc_scaler *sc = &ctx->sc;
int ret;
/* reset h/w block */
ret = gsc_sw_reset(ctx);
if (ret < 0) {
- dev_err(dev, "failed to reset hardware.\n");
+ dev_err(ctx->dev, "failed to reset hardware.\n");
return ret;
}
@@ -1523,166 +1064,172 @@ static int gsc_ippdrv_reset(struct device *dev)
return 0;
}
-static int gsc_ippdrv_start(struct device *dev, enum drm_exynos_ipp_cmd cmd)
+static void gsc_start(struct gsc_context *ctx)
{
- struct gsc_context *ctx = get_gsc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
- struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node;
- struct drm_exynos_ipp_property *property;
- struct drm_exynos_ipp_config *config;
- struct drm_exynos_pos img_pos[EXYNOS_DRM_OPS_MAX];
- struct drm_exynos_ipp_set_wb set_wb;
u32 cfg;
- int ret, i;
-
- DRM_DEBUG_KMS("cmd[%d]\n", cmd);
-
- if (!c_node) {
- DRM_ERROR("failed to get c_node.\n");
- return -EINVAL;
- }
-
- property = &c_node->property;
gsc_handle_irq(ctx, true, false, true);
- for_each_ipp_ops(i) {
- config = &property->config[i];
- img_pos[i] = config->pos;
- }
+ /* enable one shot */
+ cfg = gsc_read(GSC_ENABLE);
+ cfg &= ~(GSC_ENABLE_ON_CLEAR_MASK |
+ GSC_ENABLE_CLK_GATE_MODE_MASK);
+ cfg |= GSC_ENABLE_ON_CLEAR_ONESHOT;
+ gsc_write(cfg, GSC_ENABLE);
- switch (cmd) {
- case IPP_CMD_M2M:
- /* enable one shot */
- cfg = gsc_read(GSC_ENABLE);
- cfg &= ~(GSC_ENABLE_ON_CLEAR_MASK |
- GSC_ENABLE_CLK_GATE_MODE_MASK);
- cfg |= GSC_ENABLE_ON_CLEAR_ONESHOT;
- gsc_write(cfg, GSC_ENABLE);
-
- /* src dma memory */
- cfg = gsc_read(GSC_IN_CON);
- cfg &= ~(GSC_IN_PATH_MASK | GSC_IN_LOCAL_SEL_MASK);
- cfg |= GSC_IN_PATH_MEMORY;
- gsc_write(cfg, GSC_IN_CON);
-
- /* dst dma memory */
- cfg = gsc_read(GSC_OUT_CON);
- cfg |= GSC_OUT_PATH_MEMORY;
- gsc_write(cfg, GSC_OUT_CON);
- break;
- case IPP_CMD_WB:
- set_wb.enable = 1;
- set_wb.refresh = property->refresh_rate;
- gsc_set_gscblk_fimd_wb(ctx, set_wb.enable);
- exynos_drm_ippnb_send_event(IPP_SET_WRITEBACK, (void *)&set_wb);
-
- /* src local path */
- cfg = gsc_read(GSC_IN_CON);
- cfg &= ~(GSC_IN_PATH_MASK | GSC_IN_LOCAL_SEL_MASK);
- cfg |= (GSC_IN_PATH_LOCAL | GSC_IN_LOCAL_FIMD_WB);
- gsc_write(cfg, GSC_IN_CON);
-
- /* dst dma memory */
- cfg = gsc_read(GSC_OUT_CON);
- cfg |= GSC_OUT_PATH_MEMORY;
- gsc_write(cfg, GSC_OUT_CON);
- break;
- case IPP_CMD_OUTPUT:
- /* src dma memory */
- cfg = gsc_read(GSC_IN_CON);
- cfg &= ~(GSC_IN_PATH_MASK | GSC_IN_LOCAL_SEL_MASK);
- cfg |= GSC_IN_PATH_MEMORY;
- gsc_write(cfg, GSC_IN_CON);
-
- /* dst local path */
- cfg = gsc_read(GSC_OUT_CON);
- cfg |= GSC_OUT_PATH_MEMORY;
- gsc_write(cfg, GSC_OUT_CON);
- break;
- default:
- ret = -EINVAL;
- dev_err(dev, "invalid operations.\n");
- return ret;
- }
+ /* src dma memory */
+ cfg = gsc_read(GSC_IN_CON);
+ cfg &= ~(GSC_IN_PATH_MASK | GSC_IN_LOCAL_SEL_MASK);
+ cfg |= GSC_IN_PATH_MEMORY;
+ gsc_write(cfg, GSC_IN_CON);
- ret = gsc_set_prescaler(ctx, &ctx->sc,
- &img_pos[EXYNOS_DRM_OPS_SRC],
- &img_pos[EXYNOS_DRM_OPS_DST]);
- if (ret) {
- dev_err(dev, "failed to set prescaler.\n");
- return ret;
- }
+ /* dst dma memory */
+ cfg = gsc_read(GSC_OUT_CON);
+ cfg |= GSC_OUT_PATH_MEMORY;
+ gsc_write(cfg, GSC_OUT_CON);
gsc_set_scaler(ctx, &ctx->sc);
cfg = gsc_read(GSC_ENABLE);
cfg |= GSC_ENABLE_ON;
gsc_write(cfg, GSC_ENABLE);
+}
+
+static int gsc_commit(struct exynos_drm_ipp *ipp,
+ struct exynos_drm_ipp_task *task)
+{
+ struct gsc_context *ctx = container_of(ipp, struct gsc_context, ipp);
+ int ret;
+
+ pm_runtime_get_sync(ctx->dev);
+ ctx->task = task;
+
+ ret = gsc_reset(ctx);
+ if (ret) {
+ pm_runtime_put_autosuspend(ctx->dev);
+ ctx->task = NULL;
+ return ret;
+ }
+
+ gsc_src_set_fmt(ctx, task->src.buf.fourcc);
+ gsc_src_set_transf(ctx, task->transform.rotation);
+ gsc_src_set_size(ctx, &task->src);
+ gsc_src_set_addr(ctx, 0, &task->src);
+ gsc_dst_set_fmt(ctx, task->dst.buf.fourcc);
+ gsc_dst_set_size(ctx, &task->dst);
+ gsc_dst_set_addr(ctx, 0, &task->dst);
+ gsc_set_prescaler(ctx, &ctx->sc, &task->src.rect, &task->dst.rect);
+ gsc_start(ctx);
return 0;
}
-static void gsc_ippdrv_stop(struct device *dev, enum drm_exynos_ipp_cmd cmd)
+static void gsc_abort(struct exynos_drm_ipp *ipp,
+ struct exynos_drm_ipp_task *task)
{
- struct gsc_context *ctx = get_gsc_context(dev);
- struct drm_exynos_ipp_set_wb set_wb = {0, 0};
- u32 cfg;
+ struct gsc_context *ctx =
+ container_of(ipp, struct gsc_context, ipp);
- DRM_DEBUG_KMS("cmd[%d]\n", cmd);
+ gsc_reset(ctx);
+ if (ctx->task) {
+ struct exynos_drm_ipp_task *task = ctx->task;
- switch (cmd) {
- case IPP_CMD_M2M:
- /* bypass */
- break;
- case IPP_CMD_WB:
- gsc_set_gscblk_fimd_wb(ctx, set_wb.enable);
- exynos_drm_ippnb_send_event(IPP_SET_WRITEBACK, (void *)&set_wb);
- break;
- case IPP_CMD_OUTPUT:
- default:
- dev_err(dev, "invalid operations.\n");
- break;
+ ctx->task = NULL;
+ pm_runtime_mark_last_busy(ctx->dev);
+ pm_runtime_put_autosuspend(ctx->dev);
+ exynos_drm_ipp_task_done(task, -EIO);
}
+}
- gsc_handle_irq(ctx, false, false, true);
+static struct exynos_drm_ipp_funcs ipp_funcs = {
+ .commit = gsc_commit,
+ .abort = gsc_abort,
+};
- /* reset sequence */
- gsc_write(0xff, GSC_OUT_BASE_ADDR_Y_MASK);
- gsc_write(0xff, GSC_OUT_BASE_ADDR_CB_MASK);
- gsc_write(0xff, GSC_OUT_BASE_ADDR_CR_MASK);
+static int gsc_bind(struct device *dev, struct device *master, void *data)
+{
+ struct gsc_context *ctx = dev_get_drvdata(dev);
+ struct drm_device *drm_dev = data;
+ struct exynos_drm_ipp *ipp = &ctx->ipp;
- cfg = gsc_read(GSC_ENABLE);
- cfg &= ~GSC_ENABLE_ON;
- gsc_write(cfg, GSC_ENABLE);
+ ctx->drm_dev = drm_dev;
+ drm_iommu_attach_device(drm_dev, dev);
+
+ exynos_drm_ipp_register(drm_dev, ipp, &ipp_funcs,
+ DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE |
+ DRM_EXYNOS_IPP_CAP_SCALE | DRM_EXYNOS_IPP_CAP_CONVERT,
+ ctx->formats, ctx->num_formats, "gsc");
+
+ dev_info(dev, "The exynos gscaler has been probed successfully\n");
+
+ return 0;
+}
+
+static void gsc_unbind(struct device *dev, struct device *master,
+ void *data)
+{
+ struct gsc_context *ctx = dev_get_drvdata(dev);
+ struct drm_device *drm_dev = data;
+ struct exynos_drm_ipp *ipp = &ctx->ipp;
+
+ exynos_drm_ipp_unregister(drm_dev, ipp);
+ drm_iommu_detach_device(drm_dev, dev);
}
+static const struct component_ops gsc_component_ops = {
+ .bind = gsc_bind,
+ .unbind = gsc_unbind,
+};
+
+static const unsigned int gsc_formats[] = {
+ DRM_FORMAT_ARGB8888,
+ DRM_FORMAT_XRGB8888, DRM_FORMAT_RGB565, DRM_FORMAT_BGRX8888,
+ DRM_FORMAT_NV12, DRM_FORMAT_NV16, DRM_FORMAT_NV21, DRM_FORMAT_NV61,
+ DRM_FORMAT_UYVY, DRM_FORMAT_VYUY, DRM_FORMAT_YUYV, DRM_FORMAT_YVYU,
+ DRM_FORMAT_YUV420, DRM_FORMAT_YVU420, DRM_FORMAT_YUV422,
+};
+
static int gsc_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
+ struct gsc_driverdata *driver_data;
+ struct exynos_drm_ipp_formats *formats;
struct gsc_context *ctx;
struct resource *res;
- struct exynos_drm_ippdrv *ippdrv;
- int ret;
+ int ret, i;
ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
- if (dev->of_node) {
- ctx->sysreg = syscon_regmap_lookup_by_phandle(dev->of_node,
- "samsung,sysreg");
- if (IS_ERR(ctx->sysreg)) {
- dev_warn(dev, "failed to get system register.\n");
- ctx->sysreg = NULL;
- }
+ formats = devm_kzalloc(dev, sizeof(*formats) *
+ (ARRAY_SIZE(gsc_formats)), GFP_KERNEL);
+ if (!formats)
+ return -ENOMEM;
+
+ driver_data = (struct gsc_driverdata *)of_device_get_match_data(dev);
+ ctx->dev = dev;
+ ctx->num_clocks = driver_data->num_clocks;
+ ctx->clk_names = driver_data->clk_names;
+
+ for (i = 0; i < ARRAY_SIZE(gsc_formats); i++) {
+ formats[i].fourcc = gsc_formats[i];
+ formats[i].type = DRM_EXYNOS_IPP_FORMAT_SOURCE |
+ DRM_EXYNOS_IPP_FORMAT_DESTINATION;
+ formats[i].limits = driver_data->limits;
+ formats[i].num_limits = driver_data->num_limits;
}
+ ctx->formats = formats;
+ ctx->num_formats = ARRAY_SIZE(gsc_formats);
/* clock control */
- ctx->gsc_clk = devm_clk_get(dev, "gscl");
- if (IS_ERR(ctx->gsc_clk)) {
- dev_err(dev, "failed to get gsc clock.\n");
- return PTR_ERR(ctx->gsc_clk);
+ for (i = 0; i < ctx->num_clocks; i++) {
+ ctx->clocks[i] = devm_clk_get(dev, ctx->clk_names[i]);
+ if (IS_ERR(ctx->clocks[i])) {
+ dev_err(dev, "failed to get clock: %s\n",
+ ctx->clk_names[i]);
+ return PTR_ERR(ctx->clocks[i]);
+ }
}
/* resource memory */
@@ -1699,8 +1246,8 @@ static int gsc_probe(struct platform_device *pdev)
}
ctx->irq = res->start;
- ret = devm_request_threaded_irq(dev, ctx->irq, NULL, gsc_irq_handler,
- IRQF_ONESHOT, "drm_gsc", ctx);
+ ret = devm_request_irq(dev, ctx->irq, gsc_irq_handler, 0,
+ dev_name(dev), ctx);
if (ret < 0) {
dev_err(dev, "failed to request irq.\n");
return ret;
@@ -1709,38 +1256,22 @@ static int gsc_probe(struct platform_device *pdev)
/* context initailization */
ctx->id = pdev->id;
- ippdrv = &ctx->ippdrv;
- ippdrv->dev = dev;
- ippdrv->ops[EXYNOS_DRM_OPS_SRC] = &gsc_src_ops;
- ippdrv->ops[EXYNOS_DRM_OPS_DST] = &gsc_dst_ops;
- ippdrv->check_property = gsc_ippdrv_check_property;
- ippdrv->reset = gsc_ippdrv_reset;
- ippdrv->start = gsc_ippdrv_start;
- ippdrv->stop = gsc_ippdrv_stop;
- ret = gsc_init_prop_list(ippdrv);
- if (ret < 0) {
- dev_err(dev, "failed to init property list.\n");
- return ret;
- }
-
- DRM_DEBUG_KMS("id[%d]ippdrv[%pK]\n", ctx->id, ippdrv);
-
- mutex_init(&ctx->lock);
platform_set_drvdata(pdev, ctx);
+ pm_runtime_use_autosuspend(dev);
+ pm_runtime_set_autosuspend_delay(dev, GSC_AUTOSUSPEND_DELAY);
pm_runtime_enable(dev);
- ret = exynos_drm_ippdrv_register(ippdrv);
- if (ret < 0) {
- dev_err(dev, "failed to register drm gsc device.\n");
- goto err_ippdrv_register;
- }
+ ret = component_add(dev, &gsc_component_ops);
+ if (ret)
+ goto err_pm_dis;
dev_info(dev, "drm gsc registered successfully.\n");
return 0;
-err_ippdrv_register:
+err_pm_dis:
+ pm_runtime_dont_use_autosuspend(dev);
pm_runtime_disable(dev);
return ret;
}
@@ -1748,13 +1279,8 @@ err_ippdrv_register:
static int gsc_remove(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
- struct gsc_context *ctx = get_gsc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
- exynos_drm_ippdrv_unregister(ippdrv);
- mutex_destroy(&ctx->lock);
-
- pm_runtime_set_suspended(dev);
+ pm_runtime_dont_use_autosuspend(dev);
pm_runtime_disable(dev);
return 0;
@@ -1763,19 +1289,32 @@ static int gsc_remove(struct platform_device *pdev)
static int __maybe_unused gsc_runtime_suspend(struct device *dev)
{
struct gsc_context *ctx = get_gsc_context(dev);
+ int i;
DRM_DEBUG_KMS("id[%d]\n", ctx->id);
- return gsc_clk_ctrl(ctx, false);
+ for (i = ctx->num_clocks - 1; i >= 0; i--)
+ clk_disable_unprepare(ctx->clocks[i]);
+
+ return 0;
}
static int __maybe_unused gsc_runtime_resume(struct device *dev)
{
struct gsc_context *ctx = get_gsc_context(dev);
+ int i, ret;
DRM_DEBUG_KMS("id[%d]\n", ctx->id);
- return gsc_clk_ctrl(ctx, true);
+ for (i = 0; i < ctx->num_clocks; i++) {
+ ret = clk_prepare_enable(ctx->clocks[i]);
+ if (ret) {
+ while (--i > 0)
+ clk_disable_unprepare(ctx->clocks[i]);
+ return ret;
+ }
+ }
+ return 0;
}
static const struct dev_pm_ops gsc_pm_ops = {
@@ -1784,9 +1323,66 @@ static const struct dev_pm_ops gsc_pm_ops = {
SET_RUNTIME_PM_OPS(gsc_runtime_suspend, gsc_runtime_resume, NULL)
};
+static const struct drm_exynos_ipp_limit gsc_5250_limits[] = {
+ { IPP_SIZE_LIMIT(BUFFER, .h = { 32, 4800, 8 }, .v = { 16, 3344, 8 }) },
+ { IPP_SIZE_LIMIT(AREA, .h = { 16, 4800, 2 }, .v = { 8, 3344, 2 }) },
+ { IPP_SIZE_LIMIT(ROTATED, .h = { 32, 2048 }, .v = { 16, 2048 }) },
+ { IPP_SCALE_LIMIT(.h = { (1 << 16) / 16, (1 << 16) * 8 },
+ .v = { (1 << 16) / 16, (1 << 16) * 8 }) },
+};
+
+static const struct drm_exynos_ipp_limit gsc_5420_limits[] = {
+ { IPP_SIZE_LIMIT(BUFFER, .h = { 32, 4800, 8 }, .v = { 16, 3344, 8 }) },
+ { IPP_SIZE_LIMIT(AREA, .h = { 16, 4800, 2 }, .v = { 8, 3344, 2 }) },
+ { IPP_SIZE_LIMIT(ROTATED, .h = { 16, 2016 }, .v = { 8, 2016 }) },
+ { IPP_SCALE_LIMIT(.h = { (1 << 16) / 16, (1 << 16) * 8 },
+ .v = { (1 << 16) / 16, (1 << 16) * 8 }) },
+};
+
+static const struct drm_exynos_ipp_limit gsc_5433_limits[] = {
+ { IPP_SIZE_LIMIT(BUFFER, .h = { 32, 8191, 2 }, .v = { 16, 8191, 2 }) },
+ { IPP_SIZE_LIMIT(AREA, .h = { 16, 4800, 1 }, .v = { 8, 3344, 1 }) },
+ { IPP_SIZE_LIMIT(ROTATED, .h = { 32, 2047 }, .v = { 8, 8191 }) },
+ { IPP_SCALE_LIMIT(.h = { (1 << 16) / 16, (1 << 16) * 8 },
+ .v = { (1 << 16) / 16, (1 << 16) * 8 }) },
+};
+
+static struct gsc_driverdata gsc_exynos5250_drvdata = {
+ .clk_names = {"gscl"},
+ .num_clocks = 1,
+ .limits = gsc_5250_limits,
+ .num_limits = ARRAY_SIZE(gsc_5250_limits),
+};
+
+static struct gsc_driverdata gsc_exynos5420_drvdata = {
+ .clk_names = {"gscl"},
+ .num_clocks = 1,
+ .limits = gsc_5420_limits,
+ .num_limits = ARRAY_SIZE(gsc_5420_limits),
+};
+
+static struct gsc_driverdata gsc_exynos5433_drvdata = {
+ .clk_names = {"pclk", "aclk", "aclk_xiu", "aclk_gsclbend"},
+ .num_clocks = 4,
+ .limits = gsc_5433_limits,
+ .num_limits = ARRAY_SIZE(gsc_5433_limits),
+};
+
static const struct of_device_id exynos_drm_gsc_of_match[] = {
- { .compatible = "samsung,exynos5-gsc" },
- { },
+ {
+ .compatible = "samsung,exynos5-gsc",
+ .data = &gsc_exynos5250_drvdata,
+ }, {
+ .compatible = "samsung,exynos5250-gsc",
+ .data = &gsc_exynos5250_drvdata,
+ }, {
+ .compatible = "samsung,exynos5420-gsc",
+ .data = &gsc_exynos5420_drvdata,
+ }, {
+ .compatible = "samsung,exynos5433-gsc",
+ .data = &gsc_exynos5433_drvdata,
+ }, {
+ },
};
MODULE_DEVICE_TABLE(of, exynos_drm_gsc_of_match);
@@ -1800,4 +1396,3 @@ struct platform_driver gsc_driver = {
.of_match_table = of_match_ptr(exynos_drm_gsc_of_match),
},
};
-
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.h b/drivers/gpu/drm/exynos/exynos_drm_gsc.h
deleted file mode 100644
index 29ec1c5efcf2..000000000000
--- a/drivers/gpu/drm/exynos/exynos_drm_gsc.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright (c) 2012 Samsung Electronics Co., Ltd.
- *
- * Authors:
- * Eunchul Kim <chulspro.kim@samsung.com>
- * Jinyoung Jeon <jy0.jeon@samsung.com>
- * Sangmin Lee <lsmin.lee@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-#ifndef _EXYNOS_DRM_GSC_H_
-#define _EXYNOS_DRM_GSC_H_
-
-/*
- * TODO
- * FIMD output interface notifier callback.
- * Mixer output interface notifier callback.
- */
-
-#endif /* _EXYNOS_DRM_GSC_H_ */
diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.c b/drivers/gpu/drm/exynos/exynos_drm_ipp.c
new file mode 100644
index 000000000000..26374e58c557
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.c
@@ -0,0 +1,916 @@
+/*
+ * Copyright (C) 2017 Samsung Electronics Co.Ltd
+ * Authors:
+ * Marek Szyprowski <m.szyprowski@samsung.com>
+ *
+ * Exynos DRM Image Post Processing (IPP) related functions
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ */
+
+
+#include <drm/drmP.h>
+#include <drm/drm_mode.h>
+#include <uapi/drm/exynos_drm.h>
+
+#include "exynos_drm_drv.h"
+#include "exynos_drm_gem.h"
+#include "exynos_drm_ipp.h"
+
+static int num_ipp;
+static LIST_HEAD(ipp_list);
+
+/**
+ * exynos_drm_ipp_register - Register a new picture processor hardware module
+ * @dev: DRM device
+ * @ipp: ipp module to init
+ * @funcs: callbacks for the new ipp object
+ * @caps: bitmask of ipp capabilities (%DRM_EXYNOS_IPP_CAP_*)
+ * @formats: array of supported formats
+ * @num_formats: size of the supported formats array
+ * @name: name (for debugging purposes)
+ *
+ * Initializes a ipp module.
+ *
+ * Returns:
+ * Zero on success, error code on failure.
+ */
+int exynos_drm_ipp_register(struct drm_device *dev, struct exynos_drm_ipp *ipp,
+ const struct exynos_drm_ipp_funcs *funcs, unsigned int caps,
+ const struct exynos_drm_ipp_formats *formats,
+ unsigned int num_formats, const char *name)
+{
+ WARN_ON(!ipp);
+ WARN_ON(!funcs);
+ WARN_ON(!formats);
+ WARN_ON(!num_formats);
+
+ spin_lock_init(&ipp->lock);
+ INIT_LIST_HEAD(&ipp->todo_list);
+ init_waitqueue_head(&ipp->done_wq);
+ ipp->dev = dev;
+ ipp->funcs = funcs;
+ ipp->capabilities = caps;
+ ipp->name = name;
+ ipp->formats = formats;
+ ipp->num_formats = num_formats;
+
+ /* ipp_list modification is serialized by component framework */
+ list_add_tail(&ipp->head, &ipp_list);
+ ipp->id = num_ipp++;
+
+ DRM_DEBUG_DRIVER("Registered ipp %d\n", ipp->id);
+
+ return 0;
+}
+
+/**
+ * exynos_drm_ipp_unregister - Unregister the picture processor module
+ * @dev: DRM device
+ * @ipp: ipp module
+ */
+void exynos_drm_ipp_unregister(struct drm_device *dev,
+ struct exynos_drm_ipp *ipp)
+{
+ WARN_ON(ipp->task);
+ WARN_ON(!list_empty(&ipp->todo_list));
+ list_del(&ipp->head);
+}
+
+/**
+ * exynos_drm_ipp_ioctl_get_res_ioctl - enumerate all ipp modules
+ * @dev: DRM device
+ * @data: ioctl data
+ * @file_priv: DRM file info
+ *
+ * Construct a list of ipp ids.
+ *
+ * Called by the user via ioctl.
+ *
+ * Returns:
+ * Zero on success, negative errno on failure.
+ */
+int exynos_drm_ipp_get_res_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_exynos_ioctl_ipp_get_res *resp = data;
+ struct exynos_drm_ipp *ipp;
+ uint32_t __user *ipp_ptr = (uint32_t __user *)
+ (unsigned long)resp->ipp_id_ptr;
+ unsigned int count = num_ipp, copied = 0;
+
+ /*
+ * This ioctl is called twice, once to determine how much space is
+ * needed, and the 2nd time to fill it.
+ */
+ if (count && resp->count_ipps >= count) {
+ list_for_each_entry(ipp, &ipp_list, head) {
+ if (put_user(ipp->id, ipp_ptr + copied))
+ return -EFAULT;
+ copied++;
+ }
+ }
+ resp->count_ipps = count;
+
+ return 0;
+}
+
+static inline struct exynos_drm_ipp *__ipp_get(uint32_t id)
+{
+ struct exynos_drm_ipp *ipp;
+
+ list_for_each_entry(ipp, &ipp_list, head)
+ if (ipp->id == id)
+ return ipp;
+ return NULL;
+}
+
+/**
+ * exynos_drm_ipp_ioctl_get_caps - get ipp module capabilities and formats
+ * @dev: DRM device
+ * @data: ioctl data
+ * @file_priv: DRM file info
+ *
+ * Construct a structure describing ipp module capabilities.
+ *
+ * Called by the user via ioctl.
+ *
+ * Returns:
+ * Zero on success, negative errno on failure.
+ */
+int exynos_drm_ipp_get_caps_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_exynos_ioctl_ipp_get_caps *resp = data;
+ void __user *ptr = (void __user *)(unsigned long)resp->formats_ptr;
+ struct exynos_drm_ipp *ipp;
+ int i;
+
+ ipp = __ipp_get(resp->ipp_id);
+ if (!ipp)
+ return -ENOENT;
+
+ resp->ipp_id = ipp->id;
+ resp->capabilities = ipp->capabilities;
+
+ /*
+ * This ioctl is called twice, once to determine how much space is
+ * needed, and the 2nd time to fill it.
+ */
+ if (resp->formats_count >= ipp->num_formats) {
+ for (i = 0; i < ipp->num_formats; i++) {
+ struct drm_exynos_ipp_format tmp = {
+ .fourcc = ipp->formats[i].fourcc,
+ .type = ipp->formats[i].type,
+ .modifier = ipp->formats[i].modifier,
+ };
+
+ if (copy_to_user(ptr, &tmp, sizeof(tmp)))
+ return -EFAULT;
+ ptr += sizeof(tmp);
+ }
+ }
+ resp->formats_count = ipp->num_formats;
+
+ return 0;
+}
+
+static inline const struct exynos_drm_ipp_formats *__ipp_format_get(
+ struct exynos_drm_ipp *ipp, uint32_t fourcc,
+ uint64_t mod, unsigned int type)
+{
+ int i;
+
+ for (i = 0; i < ipp->num_formats; i++) {
+ if ((ipp->formats[i].type & type) &&
+ ipp->formats[i].fourcc == fourcc &&
+ ipp->formats[i].modifier == mod)
+ return &ipp->formats[i];
+ }
+ return NULL;
+}
+
+/**
+ * exynos_drm_ipp_get_limits_ioctl - get ipp module limits
+ * @dev: DRM device
+ * @data: ioctl data
+ * @file_priv: DRM file info
+ *
+ * Construct a structure describing ipp module limitations for provided
+ * picture format.
+ *
+ * Called by the user via ioctl.
+ *
+ * Returns:
+ * Zero on success, negative errno on failure.
+ */
+int exynos_drm_ipp_get_limits_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_exynos_ioctl_ipp_get_limits *resp = data;
+ void __user *ptr = (void __user *)(unsigned long)resp->limits_ptr;
+ const struct exynos_drm_ipp_formats *format;
+ struct exynos_drm_ipp *ipp;
+
+ if (resp->type != DRM_EXYNOS_IPP_FORMAT_SOURCE &&
+ resp->type != DRM_EXYNOS_IPP_FORMAT_DESTINATION)
+ return -EINVAL;
+
+ ipp = __ipp_get(resp->ipp_id);
+ if (!ipp)
+ return -ENOENT;
+
+ format = __ipp_format_get(ipp, resp->fourcc, resp->modifier,
+ resp->type);
+ if (!format)
+ return -EINVAL;
+
+ /*
+ * This ioctl is called twice, once to determine how much space is
+ * needed, and the 2nd time to fill it.
+ */
+ if (format->num_limits && resp->limits_count >= format->num_limits)
+ if (copy_to_user((void __user *)ptr, format->limits,
+ sizeof(*format->limits) * format->num_limits))
+ return -EFAULT;
+ resp->limits_count = format->num_limits;
+
+ return 0;
+}
+
+struct drm_pending_exynos_ipp_event {
+ struct drm_pending_event base;
+ struct drm_exynos_ipp_event event;
+};
+
+static inline struct exynos_drm_ipp_task *
+ exynos_drm_ipp_task_alloc(struct exynos_drm_ipp *ipp)
+{
+ struct exynos_drm_ipp_task *task;
+
+ task = kzalloc(sizeof(*task), GFP_KERNEL);
+ if (!task)
+ return NULL;
+
+ task->dev = ipp->dev;
+ task->ipp = ipp;
+
+ /* some defaults */
+ task->src.rect.w = task->dst.rect.w = UINT_MAX;
+ task->src.rect.h = task->dst.rect.h = UINT_MAX;
+ task->transform.rotation = DRM_MODE_ROTATE_0;
+
+ DRM_DEBUG_DRIVER("Allocated task %pK\n", task);
+
+ return task;
+}
+
+static const struct exynos_drm_param_map {
+ unsigned int id;
+ unsigned int size;
+ unsigned int offset;
+} exynos_drm_ipp_params_maps[] = {
+ {
+ DRM_EXYNOS_IPP_TASK_BUFFER | DRM_EXYNOS_IPP_TASK_TYPE_SOURCE,
+ sizeof(struct drm_exynos_ipp_task_buffer),
+ offsetof(struct exynos_drm_ipp_task, src.buf),
+ }, {
+ DRM_EXYNOS_IPP_TASK_BUFFER |
+ DRM_EXYNOS_IPP_TASK_TYPE_DESTINATION,
+ sizeof(struct drm_exynos_ipp_task_buffer),
+ offsetof(struct exynos_drm_ipp_task, dst.buf),
+ }, {
+ DRM_EXYNOS_IPP_TASK_RECTANGLE | DRM_EXYNOS_IPP_TASK_TYPE_SOURCE,
+ sizeof(struct drm_exynos_ipp_task_rect),
+ offsetof(struct exynos_drm_ipp_task, src.rect),
+ }, {
+ DRM_EXYNOS_IPP_TASK_RECTANGLE |
+ DRM_EXYNOS_IPP_TASK_TYPE_DESTINATION,
+ sizeof(struct drm_exynos_ipp_task_rect),
+ offsetof(struct exynos_drm_ipp_task, dst.rect),
+ }, {
+ DRM_EXYNOS_IPP_TASK_TRANSFORM,
+ sizeof(struct drm_exynos_ipp_task_transform),
+ offsetof(struct exynos_drm_ipp_task, transform),
+ }, {
+ DRM_EXYNOS_IPP_TASK_ALPHA,
+ sizeof(struct drm_exynos_ipp_task_alpha),
+ offsetof(struct exynos_drm_ipp_task, alpha),
+ },
+};
+
+static int exynos_drm_ipp_task_set(struct exynos_drm_ipp_task *task,
+ struct drm_exynos_ioctl_ipp_commit *arg)
+{
+ const struct exynos_drm_param_map *map = exynos_drm_ipp_params_maps;
+ void __user *params = (void __user *)(unsigned long)arg->params_ptr;
+ unsigned int size = arg->params_size;
+ uint32_t id;
+ int i;
+
+ while (size) {
+ if (get_user(id, (uint32_t __user *)params))
+ return -EFAULT;
+
+ for (i = 0; i < ARRAY_SIZE(exynos_drm_ipp_params_maps); i++)
+ if (map[i].id == id)
+ break;
+ if (i == ARRAY_SIZE(exynos_drm_ipp_params_maps) ||
+ map[i].size > size)
+ return -EINVAL;
+
+ if (copy_from_user((void *)task + map[i].offset, params,
+ map[i].size))
+ return -EFAULT;
+
+ params += map[i].size;
+ size -= map[i].size;
+ }
+
+ DRM_DEBUG_DRIVER("Got task %pK configuration from userspace\n", task);
+ return 0;
+}
+
+static int exynos_drm_ipp_task_setup_buffer(struct exynos_drm_ipp_buffer *buf,
+ struct drm_file *filp)
+{
+ int ret = 0;
+ int i;
+
+ /* basic checks */
+ if (buf->buf.width == 0 || buf->buf.height == 0)
+ return -EINVAL;
+ buf->format = drm_format_info(buf->buf.fourcc);
+ for (i = 0; i < buf->format->num_planes; i++) {
+ unsigned int width = (i == 0) ? buf->buf.width :
+ DIV_ROUND_UP(buf->buf.width, buf->format->hsub);
+
+ if (buf->buf.pitch[i] == 0)
+ buf->buf.pitch[i] = width * buf->format->cpp[i];
+ if (buf->buf.pitch[i] < width * buf->format->cpp[i])
+ return -EINVAL;
+ if (!buf->buf.gem_id[i])
+ return -ENOENT;
+ }
+
+ /* pitch for additional planes must match */
+ if (buf->format->num_planes > 2 &&
+ buf->buf.pitch[1] != buf->buf.pitch[2])
+ return -EINVAL;
+
+ /* get GEM buffers and check their size */
+ for (i = 0; i < buf->format->num_planes; i++) {
+ unsigned int height = (i == 0) ? buf->buf.height :
+ DIV_ROUND_UP(buf->buf.height, buf->format->vsub);
+ unsigned long size = height * buf->buf.pitch[i];
+ struct drm_gem_object *obj = drm_gem_object_lookup(filp,
+ buf->buf.gem_id[i]);
+ if (!obj) {
+ ret = -ENOENT;
+ goto gem_free;
+ }
+ buf->exynos_gem[i] = to_exynos_gem(obj);
+
+ if (size + buf->buf.offset[i] > buf->exynos_gem[i]->size) {
+ i++;
+ ret = -EINVAL;
+ goto gem_free;
+ }
+ buf->dma_addr[i] = buf->exynos_gem[i]->dma_addr +
+ buf->buf.offset[i];
+ }
+
+ return 0;
+gem_free:
+ while (i--) {
+ drm_gem_object_put_unlocked(&buf->exynos_gem[i]->base);
+ buf->exynos_gem[i] = NULL;
+ }
+ return ret;
+}
+
+static void exynos_drm_ipp_task_release_buf(struct exynos_drm_ipp_buffer *buf)
+{
+ int i;
+
+ if (!buf->exynos_gem[0])
+ return;
+ for (i = 0; i < buf->format->num_planes; i++)
+ drm_gem_object_put_unlocked(&buf->exynos_gem[i]->base);
+}
+
+static void exynos_drm_ipp_task_free(struct exynos_drm_ipp *ipp,
+ struct exynos_drm_ipp_task *task)
+{
+ DRM_DEBUG_DRIVER("Freeing task %pK\n", task);
+
+ exynos_drm_ipp_task_release_buf(&task->src);
+ exynos_drm_ipp_task_release_buf(&task->dst);
+ if (task->event)
+ drm_event_cancel_free(ipp->dev, &task->event->base);
+ kfree(task);
+}
+
+struct drm_ipp_limit {
+ struct drm_exynos_ipp_limit_val h;
+ struct drm_exynos_ipp_limit_val v;
+};
+
+enum drm_ipp_size_id {
+ IPP_LIMIT_BUFFER, IPP_LIMIT_AREA, IPP_LIMIT_ROTATED, IPP_LIMIT_MAX
+};
+
+static const enum drm_ipp_size_id limit_id_fallback[IPP_LIMIT_MAX][4] = {
+ [IPP_LIMIT_BUFFER] = { DRM_EXYNOS_IPP_LIMIT_SIZE_BUFFER },
+ [IPP_LIMIT_AREA] = { DRM_EXYNOS_IPP_LIMIT_SIZE_AREA,
+ DRM_EXYNOS_IPP_LIMIT_SIZE_BUFFER },
+ [IPP_LIMIT_ROTATED] = { DRM_EXYNOS_IPP_LIMIT_SIZE_ROTATED,
+ DRM_EXYNOS_IPP_LIMIT_SIZE_AREA,
+ DRM_EXYNOS_IPP_LIMIT_SIZE_BUFFER },
+};
+
+static inline void __limit_set_val(unsigned int *ptr, unsigned int val)
+{
+ if (!*ptr)
+ *ptr = val;
+}
+
+static void __get_size_limit(const struct drm_exynos_ipp_limit *limits,
+ unsigned int num_limits, enum drm_ipp_size_id id,
+ struct drm_ipp_limit *res)
+{
+ const struct drm_exynos_ipp_limit *l = limits;
+ int i = 0;
+
+ memset(res, 0, sizeof(*res));
+ for (i = 0; limit_id_fallback[id][i]; i++)
+ for (l = limits; l - limits < num_limits; l++) {
+ if (((l->type & DRM_EXYNOS_IPP_LIMIT_TYPE_MASK) !=
+ DRM_EXYNOS_IPP_LIMIT_TYPE_SIZE) ||
+ ((l->type & DRM_EXYNOS_IPP_LIMIT_SIZE_MASK) !=
+ limit_id_fallback[id][i]))
+ continue;
+ __limit_set_val(&res->h.min, l->h.min);
+ __limit_set_val(&res->h.max, l->h.max);
+ __limit_set_val(&res->h.align, l->h.align);
+ __limit_set_val(&res->v.min, l->v.min);
+ __limit_set_val(&res->v.max, l->v.max);
+ __limit_set_val(&res->v.align, l->v.align);
+ }
+}
+
+static inline bool __align_check(unsigned int val, unsigned int align)
+{
+ if (align && (val & (align - 1))) {
+ DRM_DEBUG_DRIVER("Value %d exceeds HW limits (align %d)\n",
+ val, align);
+ return false;
+ }
+ return true;
+}
+
+static inline bool __size_limit_check(unsigned int val,
+ struct drm_exynos_ipp_limit_val *l)
+{
+ if ((l->min && val < l->min) || (l->max && val > l->max)) {
+ DRM_DEBUG_DRIVER("Value %d exceeds HW limits (min %d, max %d)\n",
+ val, l->min, l->max);
+ return false;
+ }
+ return __align_check(val, l->align);
+}
+
+static int exynos_drm_ipp_check_size_limits(struct exynos_drm_ipp_buffer *buf,
+ const struct drm_exynos_ipp_limit *limits, unsigned int num_limits,
+ bool rotate, bool swap)
+{
+ enum drm_ipp_size_id id = rotate ? IPP_LIMIT_ROTATED : IPP_LIMIT_AREA;
+ struct drm_ipp_limit l;
+ struct drm_exynos_ipp_limit_val *lh = &l.h, *lv = &l.v;
+
+ if (!limits)
+ return 0;
+
+ __get_size_limit(limits, num_limits, IPP_LIMIT_BUFFER, &l);
+ if (!__size_limit_check(buf->buf.width, &l.h) ||
+ !__size_limit_check(buf->buf.height, &l.v))
+ return -EINVAL;
+
+ if (swap) {
+ lv = &l.h;
+ lh = &l.v;
+ }
+ __get_size_limit(limits, num_limits, id, &l);
+ if (!__size_limit_check(buf->rect.w, lh) ||
+ !__align_check(buf->rect.x, lh->align) ||
+ !__size_limit_check(buf->rect.h, lv) ||
+ !__align_check(buf->rect.y, lv->align))
+ return -EINVAL;
+
+ return 0;
+}
+
+static inline bool __scale_limit_check(unsigned int src, unsigned int dst,
+ unsigned int min, unsigned int max)
+{
+ if ((max && (dst << 16) > src * max) ||
+ (min && (dst << 16) < src * min)) {
+ DRM_DEBUG_DRIVER("Scale from %d to %d exceeds HW limits (ratio min %d.%05d, max %d.%05d)\n",
+ src, dst,
+ min >> 16, 100000 * (min & 0xffff) / (1 << 16),
+ max >> 16, 100000 * (max & 0xffff) / (1 << 16));
+ return false;
+ }
+ return true;
+}
+
+static int exynos_drm_ipp_check_scale_limits(
+ struct drm_exynos_ipp_task_rect *src,
+ struct drm_exynos_ipp_task_rect *dst,
+ const struct drm_exynos_ipp_limit *limits,
+ unsigned int num_limits, bool swap)
+{
+ const struct drm_exynos_ipp_limit_val *lh, *lv;
+ int dw, dh;
+
+ for (; num_limits; limits++, num_limits--)
+ if ((limits->type & DRM_EXYNOS_IPP_LIMIT_TYPE_MASK) ==
+ DRM_EXYNOS_IPP_LIMIT_TYPE_SCALE)
+ break;
+ if (!num_limits)
+ return 0;
+
+ lh = (!swap) ? &limits->h : &limits->v;
+ lv = (!swap) ? &limits->v : &limits->h;
+ dw = (!swap) ? dst->w : dst->h;
+ dh = (!swap) ? dst->h : dst->w;
+
+ if (!__scale_limit_check(src->w, dw, lh->min, lh->max) ||
+ !__scale_limit_check(src->h, dh, lv->min, lv->max))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int exynos_drm_ipp_task_check(struct exynos_drm_ipp_task *task)
+{
+ struct exynos_drm_ipp *ipp = task->ipp;
+ const struct exynos_drm_ipp_formats *src_fmt, *dst_fmt;
+ struct exynos_drm_ipp_buffer *src = &task->src, *dst = &task->dst;
+ unsigned int rotation = task->transform.rotation;
+ int ret = 0;
+ bool swap = drm_rotation_90_or_270(rotation);
+ bool rotate = (rotation != DRM_MODE_ROTATE_0);
+ bool scale = false;
+
+ DRM_DEBUG_DRIVER("Checking task %pK\n", task);
+
+ if (src->rect.w == UINT_MAX)
+ src->rect.w = src->buf.width;
+ if (src->rect.h == UINT_MAX)
+ src->rect.h = src->buf.height;
+ if (dst->rect.w == UINT_MAX)
+ dst->rect.w = dst->buf.width;
+ if (dst->rect.h == UINT_MAX)
+ dst->rect.h = dst->buf.height;
+
+ if (src->rect.x + src->rect.w > (src->buf.width) ||
+ src->rect.y + src->rect.h > (src->buf.height) ||
+ dst->rect.x + dst->rect.w > (dst->buf.width) ||
+ dst->rect.y + dst->rect.h > (dst->buf.height)) {
+ DRM_DEBUG_DRIVER("Task %pK: defined area is outside provided buffers\n",
+ task);
+ return -EINVAL;
+ }
+
+ if ((!swap && (src->rect.w != dst->rect.w ||
+ src->rect.h != dst->rect.h)) ||
+ (swap && (src->rect.w != dst->rect.h ||
+ src->rect.h != dst->rect.w)))
+ scale = true;
+
+ if ((!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_CROP) &&
+ (src->rect.x || src->rect.y || dst->rect.x || dst->rect.y)) ||
+ (!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_ROTATE) && rotate) ||
+ (!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_SCALE) && scale) ||
+ (!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_CONVERT) &&
+ src->buf.fourcc != dst->buf.fourcc)) {
+ DRM_DEBUG_DRIVER("Task %pK: hw capabilities exceeded\n", task);
+ return -EINVAL;
+ }
+
+ src_fmt = __ipp_format_get(ipp, src->buf.fourcc, src->buf.modifier,
+ DRM_EXYNOS_IPP_FORMAT_SOURCE);
+ if (!src_fmt) {
+ DRM_DEBUG_DRIVER("Task %pK: src format not supported\n", task);
+ return -EINVAL;
+ }
+ ret = exynos_drm_ipp_check_size_limits(src, src_fmt->limits,
+ src_fmt->num_limits,
+ rotate, false);
+ if (ret)
+ return ret;
+ ret = exynos_drm_ipp_check_scale_limits(&src->rect, &dst->rect,
+ src_fmt->limits,
+ src_fmt->num_limits, swap);
+ if (ret)
+ return ret;
+
+ dst_fmt = __ipp_format_get(ipp, dst->buf.fourcc, dst->buf.modifier,
+ DRM_EXYNOS_IPP_FORMAT_DESTINATION);
+ if (!dst_fmt) {
+ DRM_DEBUG_DRIVER("Task %pK: dst format not supported\n", task);
+ return -EINVAL;
+ }
+ ret = exynos_drm_ipp_check_size_limits(dst, dst_fmt->limits,
+ dst_fmt->num_limits,
+ false, swap);
+ if (ret)
+ return ret;
+ ret = exynos_drm_ipp_check_scale_limits(&src->rect, &dst->rect,
+ dst_fmt->limits,
+ dst_fmt->num_limits, swap);
+ if (ret)
+ return ret;
+
+ DRM_DEBUG_DRIVER("Task %pK: all checks done.\n", task);
+
+ return ret;
+}
+
+static int exynos_drm_ipp_task_setup_buffers(struct exynos_drm_ipp_task *task,
+ struct drm_file *filp)
+{
+ struct exynos_drm_ipp_buffer *src = &task->src, *dst = &task->dst;
+ int ret = 0;
+
+ DRM_DEBUG_DRIVER("Setting buffer for task %pK\n", task);
+
+ ret = exynos_drm_ipp_task_setup_buffer(src, filp);
+ if (ret) {
+ DRM_DEBUG_DRIVER("Task %pK: src buffer setup failed\n", task);
+ return ret;
+ }
+ ret = exynos_drm_ipp_task_setup_buffer(dst, filp);
+ if (ret) {
+ DRM_DEBUG_DRIVER("Task %pK: dst buffer setup failed\n", task);
+ return ret;
+ }
+
+ DRM_DEBUG_DRIVER("Task %pK: buffers prepared.\n", task);
+
+ return ret;
+}
+
+
+static int exynos_drm_ipp_event_create(struct exynos_drm_ipp_task *task,
+ struct drm_file *file_priv, uint64_t user_data)
+{
+ struct drm_pending_exynos_ipp_event *e = NULL;
+ int ret;
+
+ e = kzalloc(sizeof(*e), GFP_KERNEL);
+ if (!e)
+ return -ENOMEM;
+
+ e->event.base.type = DRM_EXYNOS_IPP_EVENT;
+ e->event.base.length = sizeof(e->event);
+ e->event.user_data = user_data;
+
+ ret = drm_event_reserve_init(task->dev, file_priv, &e->base,
+ &e->event.base);
+ if (ret)
+ goto free;
+
+ task->event = e;
+ return 0;
+free:
+ kfree(e);
+ return ret;
+}
+
+static void exynos_drm_ipp_event_send(struct exynos_drm_ipp_task *task)
+{
+ struct timespec64 now;
+
+ ktime_get_ts64(&now);
+ task->event->event.tv_sec = now.tv_sec;
+ task->event->event.tv_usec = now.tv_nsec / NSEC_PER_USEC;
+ task->event->event.sequence = atomic_inc_return(&task->ipp->sequence);
+
+ drm_send_event(task->dev, &task->event->base);
+}
+
+static int exynos_drm_ipp_task_cleanup(struct exynos_drm_ipp_task *task)
+{
+ int ret = task->ret;
+
+ if (ret == 0 && task->event) {
+ exynos_drm_ipp_event_send(task);
+ /* ensure event won't be canceled on task free */
+ task->event = NULL;
+ }
+
+ exynos_drm_ipp_task_free(task->ipp, task);
+ return ret;
+}
+
+static void exynos_drm_ipp_cleanup_work(struct work_struct *work)
+{
+ struct exynos_drm_ipp_task *task = container_of(work,
+ struct exynos_drm_ipp_task, cleanup_work);
+
+ exynos_drm_ipp_task_cleanup(task);
+}
+
+static void exynos_drm_ipp_next_task(struct exynos_drm_ipp *ipp);
+
+/**
+ * exynos_drm_ipp_task_done - finish given task and set return code
+ * @task: ipp task to finish
+ * @ret: error code or 0 if operation has been performed successfully
+ */
+void exynos_drm_ipp_task_done(struct exynos_drm_ipp_task *task, int ret)
+{
+ struct exynos_drm_ipp *ipp = task->ipp;
+ unsigned long flags;
+
+ DRM_DEBUG_DRIVER("ipp: %d, task %pK done: %d\n", ipp->id, task, ret);
+
+ spin_lock_irqsave(&ipp->lock, flags);
+ if (ipp->task == task)
+ ipp->task = NULL;
+ task->flags |= DRM_EXYNOS_IPP_TASK_DONE;
+ task->ret = ret;
+ spin_unlock_irqrestore(&ipp->lock, flags);
+
+ exynos_drm_ipp_next_task(ipp);
+ wake_up(&ipp->done_wq);
+
+ if (task->flags & DRM_EXYNOS_IPP_TASK_ASYNC) {
+ INIT_WORK(&task->cleanup_work, exynos_drm_ipp_cleanup_work);
+ schedule_work(&task->cleanup_work);
+ }
+}
+
+static void exynos_drm_ipp_next_task(struct exynos_drm_ipp *ipp)
+{
+ struct exynos_drm_ipp_task *task;
+ unsigned long flags;
+ int ret;
+
+ DRM_DEBUG_DRIVER("ipp: %d, try to run new task\n", ipp->id);
+
+ spin_lock_irqsave(&ipp->lock, flags);
+
+ if (ipp->task || list_empty(&ipp->todo_list)) {
+ spin_unlock_irqrestore(&ipp->lock, flags);
+ return;
+ }
+
+ task = list_first_entry(&ipp->todo_list, struct exynos_drm_ipp_task,
+ head);
+ list_del_init(&task->head);
+ ipp->task = task;
+
+ spin_unlock_irqrestore(&ipp->lock, flags);
+
+ DRM_DEBUG_DRIVER("ipp: %d, selected task %pK to run\n", ipp->id, task);
+
+ ret = ipp->funcs->commit(ipp, task);
+ if (ret)
+ exynos_drm_ipp_task_done(task, ret);
+}
+
+static void exynos_drm_ipp_schedule_task(struct exynos_drm_ipp *ipp,
+ struct exynos_drm_ipp_task *task)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ipp->lock, flags);
+ list_add(&task->head, &ipp->todo_list);
+ spin_unlock_irqrestore(&ipp->lock, flags);
+
+ exynos_drm_ipp_next_task(ipp);
+}
+
+static void exynos_drm_ipp_task_abort(struct exynos_drm_ipp *ipp,
+ struct exynos_drm_ipp_task *task)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ipp->lock, flags);
+ if (task->flags & DRM_EXYNOS_IPP_TASK_DONE) {
+ /* already completed task */
+ exynos_drm_ipp_task_cleanup(task);
+ } else if (ipp->task != task) {
+ /* task has not been scheduled for execution yet */
+ list_del_init(&task->head);
+ exynos_drm_ipp_task_cleanup(task);
+ } else {
+ /*
+ * currently processed task, call abort() and perform
+ * cleanup with async worker
+ */
+ task->flags |= DRM_EXYNOS_IPP_TASK_ASYNC;
+ spin_unlock_irqrestore(&ipp->lock, flags);
+ if (ipp->funcs->abort)
+ ipp->funcs->abort(ipp, task);
+ return;
+ }
+ spin_unlock_irqrestore(&ipp->lock, flags);
+}
+
+/**
+ * exynos_drm_ipp_commit_ioctl - perform image processing operation
+ * @dev: DRM device
+ * @data: ioctl data
+ * @file_priv: DRM file info
+ *
+ * Construct a ipp task from the set of properties provided from the user
+ * and try to schedule it to framebuffer processor hardware.
+ *
+ * Called by the user via ioctl.
+ *
+ * Returns:
+ * Zero on success, negative errno on failure.
+ */
+int exynos_drm_ipp_commit_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_exynos_ioctl_ipp_commit *arg = data;
+ struct exynos_drm_ipp *ipp;
+ struct exynos_drm_ipp_task *task;
+ int ret = 0;
+
+ if ((arg->flags & ~DRM_EXYNOS_IPP_FLAGS) || arg->reserved)
+ return -EINVAL;
+
+ /* can't test and expect an event at the same time */
+ if ((arg->flags & DRM_EXYNOS_IPP_FLAG_TEST_ONLY) &&
+ (arg->flags & DRM_EXYNOS_IPP_FLAG_EVENT))
+ return -EINVAL;
+
+ ipp = __ipp_get(arg->ipp_id);
+ if (!ipp)
+ return -ENOENT;
+
+ task = exynos_drm_ipp_task_alloc(ipp);
+ if (!task)
+ return -ENOMEM;
+
+ ret = exynos_drm_ipp_task_set(task, arg);
+ if (ret)
+ goto free;
+
+ ret = exynos_drm_ipp_task_check(task);
+ if (ret)
+ goto free;
+
+ ret = exynos_drm_ipp_task_setup_buffers(task, file_priv);
+ if (ret || arg->flags & DRM_EXYNOS_IPP_FLAG_TEST_ONLY)
+ goto free;
+
+ if (arg->flags & DRM_EXYNOS_IPP_FLAG_EVENT) {
+ ret = exynos_drm_ipp_event_create(task, file_priv,
+ arg->user_data);
+ if (ret)
+ goto free;
+ }
+
+ /*
+ * Queue task for processing on the hardware. task object will be
+ * then freed after exynos_drm_ipp_task_done()
+ */
+ if (arg->flags & DRM_EXYNOS_IPP_FLAG_NONBLOCK) {
+ DRM_DEBUG_DRIVER("ipp: %d, nonblocking processing task %pK\n",
+ ipp->id, task);
+
+ task->flags |= DRM_EXYNOS_IPP_TASK_ASYNC;
+ exynos_drm_ipp_schedule_task(task->ipp, task);
+ ret = 0;
+ } else {
+ DRM_DEBUG_DRIVER("ipp: %d, processing task %pK\n", ipp->id,
+ task);
+ exynos_drm_ipp_schedule_task(ipp, task);
+ ret = wait_event_interruptible(ipp->done_wq,
+ task->flags & DRM_EXYNOS_IPP_TASK_DONE);
+ if (ret)
+ exynos_drm_ipp_task_abort(ipp, task);
+ else
+ ret = exynos_drm_ipp_task_cleanup(task);
+ }
+ return ret;
+free:
+ exynos_drm_ipp_task_free(ipp, task);
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.h b/drivers/gpu/drm/exynos/exynos_drm_ipp.h
new file mode 100644
index 000000000000..0b27d4a9bf94
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.h
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2017 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#ifndef _EXYNOS_DRM_IPP_H_
+#define _EXYNOS_DRM_IPP_H_
+
+#include <drm/drmP.h>
+
+struct exynos_drm_ipp;
+struct exynos_drm_ipp_task;
+
+/**
+ * struct exynos_drm_ipp_funcs - exynos_drm_ipp control functions
+ */
+struct exynos_drm_ipp_funcs {
+ /**
+ * @commit:
+ *
+ * This is the main entry point to start framebuffer processing
+ * in the hardware. The exynos_drm_ipp_task has been already validated.
+ * This function must not wait until the device finishes processing.
+ * When the driver finishes processing, it has to call
+ * exynos_exynos_drm_ipp_task_done() function.
+ *
+ * RETURNS:
+ *
+ * 0 on success or negative error codes in case of failure.
+ */
+ int (*commit)(struct exynos_drm_ipp *ipp,
+ struct exynos_drm_ipp_task *task);
+
+ /**
+ * @abort:
+ *
+ * Informs the driver that it has to abort the currently running
+ * task as soon as possible (i.e. as soon as it can stop the device
+ * safely), even if the task would not have been finished by then.
+ * After the driver performs the necessary steps, it has to call
+ * exynos_drm_ipp_task_done() (as if the task ended normally).
+ * This function does not have to (and will usually not) wait
+ * until the device enters a state when it can be stopped.
+ */
+ void (*abort)(struct exynos_drm_ipp *ipp,
+ struct exynos_drm_ipp_task *task);
+};
+
+/**
+ * struct exynos_drm_ipp - central picture processor module structure
+ */
+struct exynos_drm_ipp {
+ struct drm_device *dev;
+ struct list_head head;
+ unsigned int id;
+
+ const char *name;
+ const struct exynos_drm_ipp_funcs *funcs;
+ unsigned int capabilities;
+ const struct exynos_drm_ipp_formats *formats;
+ unsigned int num_formats;
+ atomic_t sequence;
+
+ spinlock_t lock;
+ struct exynos_drm_ipp_task *task;
+ struct list_head todo_list;
+ wait_queue_head_t done_wq;
+};
+
+struct exynos_drm_ipp_buffer {
+ struct drm_exynos_ipp_task_buffer buf;
+ struct drm_exynos_ipp_task_rect rect;
+
+ struct exynos_drm_gem *exynos_gem[MAX_FB_BUFFER];
+ const struct drm_format_info *format;
+ dma_addr_t dma_addr[MAX_FB_BUFFER];
+};
+
+/**
+ * struct exynos_drm_ipp_task - a structure describing transformation that
+ * has to be performed by the picture processor hardware module
+ */
+struct exynos_drm_ipp_task {
+ struct drm_device *dev;
+ struct exynos_drm_ipp *ipp;
+ struct list_head head;
+
+ struct exynos_drm_ipp_buffer src;
+ struct exynos_drm_ipp_buffer dst;
+
+ struct drm_exynos_ipp_task_transform transform;
+ struct drm_exynos_ipp_task_alpha alpha;
+
+ struct work_struct cleanup_work;
+ unsigned int flags;
+ int ret;
+
+ struct drm_pending_exynos_ipp_event *event;
+};
+
+#define DRM_EXYNOS_IPP_TASK_DONE (1 << 0)
+#define DRM_EXYNOS_IPP_TASK_ASYNC (1 << 1)
+
+struct exynos_drm_ipp_formats {
+ uint32_t fourcc;
+ uint32_t type;
+ uint64_t modifier;
+ const struct drm_exynos_ipp_limit *limits;
+ unsigned int num_limits;
+};
+
+/* helper macros to set exynos_drm_ipp_formats structure and limits*/
+#define IPP_SRCDST_MFORMAT(f, m, l) \
+ .fourcc = DRM_FORMAT_##f, .modifier = m, .limits = l, \
+ .num_limits = ARRAY_SIZE(l), \
+ .type = (DRM_EXYNOS_IPP_FORMAT_SOURCE | \
+ DRM_EXYNOS_IPP_FORMAT_DESTINATION)
+
+#define IPP_SRCDST_FORMAT(f, l) IPP_SRCDST_MFORMAT(f, 0, l)
+
+#define IPP_SIZE_LIMIT(l, val...) \
+ .type = (DRM_EXYNOS_IPP_LIMIT_TYPE_SIZE | \
+ DRM_EXYNOS_IPP_LIMIT_SIZE_##l), val
+
+#define IPP_SCALE_LIMIT(val...) \
+ .type = (DRM_EXYNOS_IPP_LIMIT_TYPE_SCALE), val
+
+int exynos_drm_ipp_register(struct drm_device *dev, struct exynos_drm_ipp *ipp,
+ const struct exynos_drm_ipp_funcs *funcs, unsigned int caps,
+ const struct exynos_drm_ipp_formats *formats,
+ unsigned int num_formats, const char *name);
+void exynos_drm_ipp_unregister(struct drm_device *dev,
+ struct exynos_drm_ipp *ipp);
+
+void exynos_drm_ipp_task_done(struct exynos_drm_ipp_task *task, int ret);
+
+#ifdef CONFIG_DRM_EXYNOS_IPP
+int exynos_drm_ipp_get_res_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+int exynos_drm_ipp_get_caps_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+int exynos_drm_ipp_get_limits_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+int exynos_drm_ipp_commit_ioctl(struct drm_device *dev,
+ void *data, struct drm_file *file_priv);
+#else
+static inline int exynos_drm_ipp_get_res_ioctl(struct drm_device *dev,
+ void *data, struct drm_file *file_priv)
+{
+ struct drm_exynos_ioctl_ipp_get_res *resp = data;
+
+ resp->count_ipps = 0;
+ return 0;
+}
+static inline int exynos_drm_ipp_get_caps_ioctl(struct drm_device *dev,
+ void *data, struct drm_file *file_priv)
+{
+ return -ENODEV;
+}
+static inline int exynos_drm_ipp_get_limits_ioctl(struct drm_device *dev,
+ void *data, struct drm_file *file_priv)
+{
+ return -ENODEV;
+}
+static inline int exynos_drm_ipp_commit_ioctl(struct drm_device *dev,
+ void *data, struct drm_file *file_priv)
+{
+ return -ENODEV;
+}
+#endif
+#endif
diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
index 79282a820ecc..1a76dd3d52e1 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
@@ -10,6 +10,7 @@
*/
#include <linux/kernel.h>
+#include <linux/component.h>
#include <linux/err.h>
#include <linux/interrupt.h>
#include <linux/io.h>
@@ -22,29 +23,18 @@
#include <drm/exynos_drm.h>
#include "regs-rotator.h"
#include "exynos_drm_drv.h"
+#include "exynos_drm_iommu.h"
#include "exynos_drm_ipp.h"
/*
* Rotator supports image crop/rotator and input/output DMA operations.
* input DMA reads image data from the memory.
* output DMA writes image data to memory.
- *
- * M2M operation : supports crop/scale/rotation/csc so on.
- * Memory ----> Rotator H/W ----> Memory.
*/
-/*
- * TODO
- * 1. check suspend/resume api if needed.
- * 2. need to check use case platform_device_id.
- * 3. check src/dst size with, height.
- * 4. need to add supported list in prop_list.
- */
+#define ROTATOR_AUTOSUSPEND_DELAY 2000
-#define get_rot_context(dev) platform_get_drvdata(to_platform_device(dev))
-#define get_ctx_from_ippdrv(ippdrv) container_of(ippdrv,\
- struct rot_context, ippdrv);
-#define rot_read(offset) readl(rot->regs + (offset))
+#define rot_read(offset) readl(rot->regs + (offset))
#define rot_write(cfg, offset) writel(cfg, rot->regs + (offset))
enum rot_irq_status {
@@ -52,54 +42,28 @@ enum rot_irq_status {
ROT_IRQ_STATUS_ILLEGAL = 9,
};
-/*
- * A structure of limitation.
- *
- * @min_w: minimum width.
- * @min_h: minimum height.
- * @max_w: maximum width.
- * @max_h: maximum height.
- * @align: align size.
- */
-struct rot_limit {
- u32 min_w;
- u32 min_h;
- u32 max_w;
- u32 max_h;
- u32 align;
-};
-
-/*
- * A structure of limitation table.
- *
- * @ycbcr420_2p: case of YUV.
- * @rgb888: case of RGB.
- */
-struct rot_limit_table {
- struct rot_limit ycbcr420_2p;
- struct rot_limit rgb888;
+struct rot_variant {
+ const struct exynos_drm_ipp_formats *formats;
+ unsigned int num_formats;
};
/*
* A structure of rotator context.
* @ippdrv: prepare initialization using ippdrv.
- * @regs_res: register resources.
* @regs: memory mapped io registers.
* @clock: rotator gate clock.
* @limit_tbl: limitation of rotator.
* @irq: irq number.
- * @cur_buf_id: current operation buffer id.
- * @suspended: suspended state.
*/
struct rot_context {
- struct exynos_drm_ippdrv ippdrv;
- struct resource *regs_res;
+ struct exynos_drm_ipp ipp;
+ struct drm_device *drm_dev;
+ struct device *dev;
void __iomem *regs;
struct clk *clock;
- struct rot_limit_table *limit_tbl;
- int irq;
- int cur_buf_id[EXYNOS_DRM_OPS_MAX];
- bool suspended;
+ const struct exynos_drm_ipp_formats *formats;
+ unsigned int num_formats;
+ struct exynos_drm_ipp_task *task;
};
static void rotator_reg_set_irq(struct rot_context *rot, bool enable)
@@ -114,15 +78,6 @@ static void rotator_reg_set_irq(struct rot_context *rot, bool enable)
rot_write(val, ROT_CONFIG);
}
-static u32 rotator_reg_get_fmt(struct rot_context *rot)
-{
- u32 val = rot_read(ROT_CONTROL);
-
- val &= ROT_CONTROL_FMT_MASK;
-
- return val;
-}
-
static enum rot_irq_status rotator_reg_get_irq_status(struct rot_context *rot)
{
u32 val = rot_read(ROT_STATUS);
@@ -138,9 +93,6 @@ static enum rot_irq_status rotator_reg_get_irq_status(struct rot_context *rot)
static irqreturn_t rotator_irq_handler(int irq, void *arg)
{
struct rot_context *rot = arg;
- struct exynos_drm_ippdrv *ippdrv = &rot->ippdrv;
- struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node;
- struct drm_exynos_ipp_event_work *event_work = c_node->event_work;
enum rot_irq_status irq_status;
u32 val;
@@ -152,56 +104,21 @@ static irqreturn_t rotator_irq_handler(int irq, void *arg)
val |= ROT_STATUS_IRQ_PENDING((u32)irq_status);
rot_write(val, ROT_STATUS);
- if (irq_status == ROT_IRQ_STATUS_COMPLETE) {
- event_work->ippdrv = ippdrv;
- event_work->buf_id[EXYNOS_DRM_OPS_DST] =
- rot->cur_buf_id[EXYNOS_DRM_OPS_DST];
- queue_work(ippdrv->event_workq, &event_work->work);
- } else {
- DRM_ERROR("the SFR is set illegally\n");
+ if (rot->task) {
+ struct exynos_drm_ipp_task *task = rot->task;
+
+ rot->task = NULL;
+ pm_runtime_mark_last_busy(rot->dev);
+ pm_runtime_put_autosuspend(rot->dev);
+ exynos_drm_ipp_task_done(task,
+ irq_status == ROT_IRQ_STATUS_COMPLETE ? 0 : -EINVAL);
}
return IRQ_HANDLED;
}
-static void rotator_align_size(struct rot_context *rot, u32 fmt, u32 *hsize,
- u32 *vsize)
+static void rotator_src_set_fmt(struct rot_context *rot, u32 fmt)
{
- struct rot_limit_table *limit_tbl = rot->limit_tbl;
- struct rot_limit *limit;
- u32 mask, val;
-
- /* Get size limit */
- if (fmt == ROT_CONTROL_FMT_RGB888)
- limit = &limit_tbl->rgb888;
- else
- limit = &limit_tbl->ycbcr420_2p;
-
- /* Get mask for rounding to nearest aligned val */
- mask = ~((1 << limit->align) - 1);
-
- /* Set aligned width */
- val = ROT_ALIGN(*hsize, limit->align, mask);
- if (val < limit->min_w)
- *hsize = ROT_MIN(limit->min_w, mask);
- else if (val > limit->max_w)
- *hsize = ROT_MAX(limit->max_w, mask);
- else
- *hsize = val;
-
- /* Set aligned height */
- val = ROT_ALIGN(*vsize, limit->align, mask);
- if (val < limit->min_h)
- *vsize = ROT_MIN(limit->min_h, mask);
- else if (val > limit->max_h)
- *vsize = ROT_MAX(limit->max_h, mask);
- else
- *vsize = val;
-}
-
-static int rotator_src_set_fmt(struct device *dev, u32 fmt)
-{
- struct rot_context *rot = dev_get_drvdata(dev);
u32 val;
val = rot_read(ROT_CONTROL);
@@ -214,515 +131,176 @@ static int rotator_src_set_fmt(struct device *dev, u32 fmt)
case DRM_FORMAT_XRGB8888:
val |= ROT_CONTROL_FMT_RGB888;
break;
- default:
- DRM_ERROR("invalid image format\n");
- return -EINVAL;
}
rot_write(val, ROT_CONTROL);
-
- return 0;
}
-static inline bool rotator_check_reg_fmt(u32 fmt)
+static void rotator_src_set_buf(struct rot_context *rot,
+ struct exynos_drm_ipp_buffer *buf)
{
- if ((fmt == ROT_CONTROL_FMT_YCBCR420_2P) ||
- (fmt == ROT_CONTROL_FMT_RGB888))
- return true;
-
- return false;
-}
-
-static int rotator_src_set_size(struct device *dev, int swap,
- struct drm_exynos_pos *pos,
- struct drm_exynos_sz *sz)
-{
- struct rot_context *rot = dev_get_drvdata(dev);
- u32 fmt, hsize, vsize;
u32 val;
- /* Get format */
- fmt = rotator_reg_get_fmt(rot);
- if (!rotator_check_reg_fmt(fmt)) {
- DRM_ERROR("invalid format.\n");
- return -EINVAL;
- }
-
- /* Align buffer size */
- hsize = sz->hsize;
- vsize = sz->vsize;
- rotator_align_size(rot, fmt, &hsize, &vsize);
-
/* Set buffer size configuration */
- val = ROT_SET_BUF_SIZE_H(vsize) | ROT_SET_BUF_SIZE_W(hsize);
+ val = ROT_SET_BUF_SIZE_H(buf->buf.height) |
+ ROT_SET_BUF_SIZE_W(buf->buf.pitch[0] / buf->format->cpp[0]);
rot_write(val, ROT_SRC_BUF_SIZE);
/* Set crop image position configuration */
- val = ROT_CROP_POS_Y(pos->y) | ROT_CROP_POS_X(pos->x);
+ val = ROT_CROP_POS_Y(buf->rect.y) | ROT_CROP_POS_X(buf->rect.x);
rot_write(val, ROT_SRC_CROP_POS);
- val = ROT_SRC_CROP_SIZE_H(pos->h) | ROT_SRC_CROP_SIZE_W(pos->w);
+ val = ROT_SRC_CROP_SIZE_H(buf->rect.h) |
+ ROT_SRC_CROP_SIZE_W(buf->rect.w);
rot_write(val, ROT_SRC_CROP_SIZE);
- return 0;
+ /* Set buffer DMA address */
+ rot_write(buf->dma_addr[0], ROT_SRC_BUF_ADDR(0));
+ rot_write(buf->dma_addr[1], ROT_SRC_BUF_ADDR(1));
}
-static int rotator_src_set_addr(struct device *dev,
- struct drm_exynos_ipp_buf_info *buf_info,
- u32 buf_id, enum drm_exynos_ipp_buf_type buf_type)
+static void rotator_dst_set_transf(struct rot_context *rot,
+ unsigned int rotation)
{
- struct rot_context *rot = dev_get_drvdata(dev);
- dma_addr_t addr[EXYNOS_DRM_PLANAR_MAX];
- u32 val, fmt, hsize, vsize;
- int i;
-
- /* Set current buf_id */
- rot->cur_buf_id[EXYNOS_DRM_OPS_SRC] = buf_id;
-
- switch (buf_type) {
- case IPP_BUF_ENQUEUE:
- /* Set address configuration */
- for_each_ipp_planar(i)
- addr[i] = buf_info->base[i];
-
- /* Get format */
- fmt = rotator_reg_get_fmt(rot);
- if (!rotator_check_reg_fmt(fmt)) {
- DRM_ERROR("invalid format.\n");
- return -EINVAL;
- }
-
- /* Re-set cb planar for NV12 format */
- if ((fmt == ROT_CONTROL_FMT_YCBCR420_2P) &&
- !addr[EXYNOS_DRM_PLANAR_CB]) {
-
- val = rot_read(ROT_SRC_BUF_SIZE);
- hsize = ROT_GET_BUF_SIZE_W(val);
- vsize = ROT_GET_BUF_SIZE_H(val);
-
- /* Set cb planar */
- addr[EXYNOS_DRM_PLANAR_CB] =
- addr[EXYNOS_DRM_PLANAR_Y] + hsize * vsize;
- }
-
- for_each_ipp_planar(i)
- rot_write(addr[i], ROT_SRC_BUF_ADDR(i));
- break;
- case IPP_BUF_DEQUEUE:
- for_each_ipp_planar(i)
- rot_write(0x0, ROT_SRC_BUF_ADDR(i));
- break;
- default:
- /* Nothing to do */
- break;
- }
-
- return 0;
-}
-
-static int rotator_dst_set_transf(struct device *dev,
- enum drm_exynos_degree degree,
- enum drm_exynos_flip flip, bool *swap)
-{
- struct rot_context *rot = dev_get_drvdata(dev);
u32 val;
/* Set transform configuration */
val = rot_read(ROT_CONTROL);
val &= ~ROT_CONTROL_FLIP_MASK;
- switch (flip) {
- case EXYNOS_DRM_FLIP_VERTICAL:
- val |= ROT_CONTROL_FLIP_VERTICAL;
- break;
- case EXYNOS_DRM_FLIP_HORIZONTAL:
+ if (rotation & DRM_MODE_REFLECT_X)
val |= ROT_CONTROL_FLIP_HORIZONTAL;
- break;
- default:
- /* Flip None */
- break;
- }
+ if (rotation & DRM_MODE_REFLECT_Y)
+ val |= ROT_CONTROL_FLIP_VERTICAL;
val &= ~ROT_CONTROL_ROT_MASK;
- switch (degree) {
- case EXYNOS_DRM_DEGREE_90:
+ if (rotation & DRM_MODE_ROTATE_90)
val |= ROT_CONTROL_ROT_90;
- break;
- case EXYNOS_DRM_DEGREE_180:
+ else if (rotation & DRM_MODE_ROTATE_180)
val |= ROT_CONTROL_ROT_180;
- break;
- case EXYNOS_DRM_DEGREE_270:
+ else if (rotation & DRM_MODE_ROTATE_270)
val |= ROT_CONTROL_ROT_270;
- break;
- default:
- /* Rotation 0 Degree */
- break;
- }
rot_write(val, ROT_CONTROL);
-
- /* Check degree for setting buffer size swap */
- if ((degree == EXYNOS_DRM_DEGREE_90) ||
- (degree == EXYNOS_DRM_DEGREE_270))
- *swap = true;
- else
- *swap = false;
-
- return 0;
}
-static int rotator_dst_set_size(struct device *dev, int swap,
- struct drm_exynos_pos *pos,
- struct drm_exynos_sz *sz)
+static void rotator_dst_set_buf(struct rot_context *rot,
+ struct exynos_drm_ipp_buffer *buf)
{
- struct rot_context *rot = dev_get_drvdata(dev);
- u32 val, fmt, hsize, vsize;
-
- /* Get format */
- fmt = rotator_reg_get_fmt(rot);
- if (!rotator_check_reg_fmt(fmt)) {
- DRM_ERROR("invalid format.\n");
- return -EINVAL;
- }
-
- /* Align buffer size */
- hsize = sz->hsize;
- vsize = sz->vsize;
- rotator_align_size(rot, fmt, &hsize, &vsize);
+ u32 val;
/* Set buffer size configuration */
- val = ROT_SET_BUF_SIZE_H(vsize) | ROT_SET_BUF_SIZE_W(hsize);
+ val = ROT_SET_BUF_SIZE_H(buf->buf.height) |
+ ROT_SET_BUF_SIZE_W(buf->buf.pitch[0] / buf->format->cpp[0]);
rot_write(val, ROT_DST_BUF_SIZE);
/* Set crop image position configuration */
- val = ROT_CROP_POS_Y(pos->y) | ROT_CROP_POS_X(pos->x);
+ val = ROT_CROP_POS_Y(buf->rect.y) | ROT_CROP_POS_X(buf->rect.x);
rot_write(val, ROT_DST_CROP_POS);
- return 0;
+ /* Set buffer DMA address */
+ rot_write(buf->dma_addr[0], ROT_DST_BUF_ADDR(0));
+ rot_write(buf->dma_addr[1], ROT_DST_BUF_ADDR(1));
}
-static int rotator_dst_set_addr(struct device *dev,
- struct drm_exynos_ipp_buf_info *buf_info,
- u32 buf_id, enum drm_exynos_ipp_buf_type buf_type)
+static void rotator_start(struct rot_context *rot)
{
- struct rot_context *rot = dev_get_drvdata(dev);
- dma_addr_t addr[EXYNOS_DRM_PLANAR_MAX];
- u32 val, fmt, hsize, vsize;
- int i;
-
- /* Set current buf_id */
- rot->cur_buf_id[EXYNOS_DRM_OPS_DST] = buf_id;
-
- switch (buf_type) {
- case IPP_BUF_ENQUEUE:
- /* Set address configuration */
- for_each_ipp_planar(i)
- addr[i] = buf_info->base[i];
-
- /* Get format */
- fmt = rotator_reg_get_fmt(rot);
- if (!rotator_check_reg_fmt(fmt)) {
- DRM_ERROR("invalid format.\n");
- return -EINVAL;
- }
-
- /* Re-set cb planar for NV12 format */
- if ((fmt == ROT_CONTROL_FMT_YCBCR420_2P) &&
- !addr[EXYNOS_DRM_PLANAR_CB]) {
- /* Get buf size */
- val = rot_read(ROT_DST_BUF_SIZE);
-
- hsize = ROT_GET_BUF_SIZE_W(val);
- vsize = ROT_GET_BUF_SIZE_H(val);
-
- /* Set cb planar */
- addr[EXYNOS_DRM_PLANAR_CB] =
- addr[EXYNOS_DRM_PLANAR_Y] + hsize * vsize;
- }
-
- for_each_ipp_planar(i)
- rot_write(addr[i], ROT_DST_BUF_ADDR(i));
- break;
- case IPP_BUF_DEQUEUE:
- for_each_ipp_planar(i)
- rot_write(0x0, ROT_DST_BUF_ADDR(i));
- break;
- default:
- /* Nothing to do */
- break;
- }
+ u32 val;
- return 0;
+ /* Set interrupt enable */
+ rotator_reg_set_irq(rot, true);
+
+ val = rot_read(ROT_CONTROL);
+ val |= ROT_CONTROL_START;
+ rot_write(val, ROT_CONTROL);
}
-static struct exynos_drm_ipp_ops rot_src_ops = {
- .set_fmt = rotator_src_set_fmt,
- .set_size = rotator_src_set_size,
- .set_addr = rotator_src_set_addr,
-};
+static int rotator_commit(struct exynos_drm_ipp *ipp,
+ struct exynos_drm_ipp_task *task)
+{
+ struct rot_context *rot =
+ container_of(ipp, struct rot_context, ipp);
-static struct exynos_drm_ipp_ops rot_dst_ops = {
- .set_transf = rotator_dst_set_transf,
- .set_size = rotator_dst_set_size,
- .set_addr = rotator_dst_set_addr,
-};
+ pm_runtime_get_sync(rot->dev);
+ rot->task = task;
-static int rotator_init_prop_list(struct exynos_drm_ippdrv *ippdrv)
-{
- struct drm_exynos_ipp_prop_list *prop_list = &ippdrv->prop_list;
-
- prop_list->version = 1;
- prop_list->flip = (1 << EXYNOS_DRM_FLIP_VERTICAL) |
- (1 << EXYNOS_DRM_FLIP_HORIZONTAL);
- prop_list->degree = (1 << EXYNOS_DRM_DEGREE_0) |
- (1 << EXYNOS_DRM_DEGREE_90) |
- (1 << EXYNOS_DRM_DEGREE_180) |
- (1 << EXYNOS_DRM_DEGREE_270);
- prop_list->csc = 0;
- prop_list->crop = 0;
- prop_list->scale = 0;
+ rotator_src_set_fmt(rot, task->src.buf.fourcc);
+ rotator_src_set_buf(rot, &task->src);
+ rotator_dst_set_transf(rot, task->transform.rotation);
+ rotator_dst_set_buf(rot, &task->dst);
+ rotator_start(rot);
return 0;
}
-static inline bool rotator_check_drm_fmt(u32 fmt)
-{
- switch (fmt) {
- case DRM_FORMAT_XRGB8888:
- case DRM_FORMAT_NV12:
- return true;
- default:
- DRM_DEBUG_KMS("not support format\n");
- return false;
- }
-}
-
-static inline bool rotator_check_drm_flip(enum drm_exynos_flip flip)
-{
- switch (flip) {
- case EXYNOS_DRM_FLIP_NONE:
- case EXYNOS_DRM_FLIP_VERTICAL:
- case EXYNOS_DRM_FLIP_HORIZONTAL:
- case EXYNOS_DRM_FLIP_BOTH:
- return true;
- default:
- DRM_DEBUG_KMS("invalid flip\n");
- return false;
- }
-}
+static const struct exynos_drm_ipp_funcs ipp_funcs = {
+ .commit = rotator_commit,
+};
-static int rotator_ippdrv_check_property(struct device *dev,
- struct drm_exynos_ipp_property *property)
+static int rotator_bind(struct device *dev, struct device *master, void *data)
{
- struct drm_exynos_ipp_config *src_config =
- &property->config[EXYNOS_DRM_OPS_SRC];
- struct drm_exynos_ipp_config *dst_config =
- &property->config[EXYNOS_DRM_OPS_DST];
- struct drm_exynos_pos *src_pos = &src_config->pos;
- struct drm_exynos_pos *dst_pos = &dst_config->pos;
- struct drm_exynos_sz *src_sz = &src_config->sz;
- struct drm_exynos_sz *dst_sz = &dst_config->sz;
- bool swap = false;
-
- /* Check format configuration */
- if (src_config->fmt != dst_config->fmt) {
- DRM_DEBUG_KMS("not support csc feature\n");
- return -EINVAL;
- }
-
- if (!rotator_check_drm_fmt(dst_config->fmt)) {
- DRM_DEBUG_KMS("invalid format\n");
- return -EINVAL;
- }
-
- /* Check transform configuration */
- if (src_config->degree != EXYNOS_DRM_DEGREE_0) {
- DRM_DEBUG_KMS("not support source-side rotation\n");
- return -EINVAL;
- }
-
- switch (dst_config->degree) {
- case EXYNOS_DRM_DEGREE_90:
- case EXYNOS_DRM_DEGREE_270:
- swap = true;
- case EXYNOS_DRM_DEGREE_0:
- case EXYNOS_DRM_DEGREE_180:
- /* No problem */
- break;
- default:
- DRM_DEBUG_KMS("invalid degree\n");
- return -EINVAL;
- }
-
- if (src_config->flip != EXYNOS_DRM_FLIP_NONE) {
- DRM_DEBUG_KMS("not support source-side flip\n");
- return -EINVAL;
- }
+ struct rot_context *rot = dev_get_drvdata(dev);
+ struct drm_device *drm_dev = data;
+ struct exynos_drm_ipp *ipp = &rot->ipp;
- if (!rotator_check_drm_flip(dst_config->flip)) {
- DRM_DEBUG_KMS("invalid flip\n");
- return -EINVAL;
- }
+ rot->drm_dev = drm_dev;
+ drm_iommu_attach_device(drm_dev, dev);
- /* Check size configuration */
- if ((src_pos->x + src_pos->w > src_sz->hsize) ||
- (src_pos->y + src_pos->h > src_sz->vsize)) {
- DRM_DEBUG_KMS("out of source buffer bound\n");
- return -EINVAL;
- }
+ exynos_drm_ipp_register(drm_dev, ipp, &ipp_funcs,
+ DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE,
+ rot->formats, rot->num_formats, "rotator");
- if (swap) {
- if ((dst_pos->x + dst_pos->h > dst_sz->vsize) ||
- (dst_pos->y + dst_pos->w > dst_sz->hsize)) {
- DRM_DEBUG_KMS("out of destination buffer bound\n");
- return -EINVAL;
- }
-
- if ((src_pos->w != dst_pos->h) || (src_pos->h != dst_pos->w)) {
- DRM_DEBUG_KMS("not support scale feature\n");
- return -EINVAL;
- }
- } else {
- if ((dst_pos->x + dst_pos->w > dst_sz->hsize) ||
- (dst_pos->y + dst_pos->h > dst_sz->vsize)) {
- DRM_DEBUG_KMS("out of destination buffer bound\n");
- return -EINVAL;
- }
-
- if ((src_pos->w != dst_pos->w) || (src_pos->h != dst_pos->h)) {
- DRM_DEBUG_KMS("not support scale feature\n");
- return -EINVAL;
- }
- }
+ dev_info(dev, "The exynos rotator has been probed successfully\n");
return 0;
}
-static int rotator_ippdrv_start(struct device *dev, enum drm_exynos_ipp_cmd cmd)
+static void rotator_unbind(struct device *dev, struct device *master,
+ void *data)
{
struct rot_context *rot = dev_get_drvdata(dev);
- u32 val;
-
- if (rot->suspended) {
- DRM_ERROR("suspended state\n");
- return -EPERM;
- }
-
- if (cmd != IPP_CMD_M2M) {
- DRM_ERROR("not support cmd: %d\n", cmd);
- return -EINVAL;
- }
-
- /* Set interrupt enable */
- rotator_reg_set_irq(rot, true);
-
- val = rot_read(ROT_CONTROL);
- val |= ROT_CONTROL_START;
-
- rot_write(val, ROT_CONTROL);
+ struct drm_device *drm_dev = data;
+ struct exynos_drm_ipp *ipp = &rot->ipp;
- return 0;
+ exynos_drm_ipp_unregister(drm_dev, ipp);
+ drm_iommu_detach_device(rot->drm_dev, rot->dev);
}
-static struct rot_limit_table rot_limit_tbl_4210 = {
- .ycbcr420_2p = {
- .min_w = 32,
- .min_h = 32,
- .max_w = SZ_64K,
- .max_h = SZ_64K,
- .align = 3,
- },
- .rgb888 = {
- .min_w = 8,
- .min_h = 8,
- .max_w = SZ_16K,
- .max_h = SZ_16K,
- .align = 2,
- },
-};
-
-static struct rot_limit_table rot_limit_tbl_4x12 = {
- .ycbcr420_2p = {
- .min_w = 32,
- .min_h = 32,
- .max_w = SZ_32K,
- .max_h = SZ_32K,
- .align = 3,
- },
- .rgb888 = {
- .min_w = 8,
- .min_h = 8,
- .max_w = SZ_8K,
- .max_h = SZ_8K,
- .align = 2,
- },
+static const struct component_ops rotator_component_ops = {
+ .bind = rotator_bind,
+ .unbind = rotator_unbind,
};
-static struct rot_limit_table rot_limit_tbl_5250 = {
- .ycbcr420_2p = {
- .min_w = 32,
- .min_h = 32,
- .max_w = SZ_32K,
- .max_h = SZ_32K,
- .align = 3,
- },
- .rgb888 = {
- .min_w = 8,
- .min_h = 8,
- .max_w = SZ_8K,
- .max_h = SZ_8K,
- .align = 1,
- },
-};
-
-static const struct of_device_id exynos_rotator_match[] = {
- {
- .compatible = "samsung,exynos4210-rotator",
- .data = &rot_limit_tbl_4210,
- },
- {
- .compatible = "samsung,exynos4212-rotator",
- .data = &rot_limit_tbl_4x12,
- },
- {
- .compatible = "samsung,exynos5250-rotator",
- .data = &rot_limit_tbl_5250,
- },
- {},
-};
-MODULE_DEVICE_TABLE(of, exynos_rotator_match);
-
static int rotator_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
+ struct resource *regs_res;
struct rot_context *rot;
- struct exynos_drm_ippdrv *ippdrv;
+ const struct rot_variant *variant;
+ int irq;
int ret;
- if (!dev->of_node) {
- dev_err(dev, "cannot find of_node.\n");
- return -ENODEV;
- }
-
rot = devm_kzalloc(dev, sizeof(*rot), GFP_KERNEL);
if (!rot)
return -ENOMEM;
- rot->limit_tbl = (struct rot_limit_table *)
- of_device_get_match_data(dev);
- rot->regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- rot->regs = devm_ioremap_resource(dev, rot->regs_res);
+ variant = of_device_get_match_data(dev);
+ rot->formats = variant->formats;
+ rot->num_formats = variant->num_formats;
+ rot->dev = dev;
+ regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ rot->regs = devm_ioremap_resource(dev, regs_res);
if (IS_ERR(rot->regs))
return PTR_ERR(rot->regs);
- rot->irq = platform_get_irq(pdev, 0);
- if (rot->irq < 0) {
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
dev_err(dev, "failed to get irq\n");
- return rot->irq;
+ return irq;
}
- ret = devm_request_threaded_irq(dev, rot->irq, NULL,
- rotator_irq_handler, IRQF_ONESHOT, "drm_rotator", rot);
+ ret = devm_request_irq(dev, irq, rotator_irq_handler, 0, dev_name(dev),
+ rot);
if (ret < 0) {
dev_err(dev, "failed to request irq\n");
return ret;
@@ -734,35 +312,19 @@ static int rotator_probe(struct platform_device *pdev)
return PTR_ERR(rot->clock);
}
+ pm_runtime_use_autosuspend(dev);
+ pm_runtime_set_autosuspend_delay(dev, ROTATOR_AUTOSUSPEND_DELAY);
pm_runtime_enable(dev);
-
- ippdrv = &rot->ippdrv;
- ippdrv->dev = dev;
- ippdrv->ops[EXYNOS_DRM_OPS_SRC] = &rot_src_ops;
- ippdrv->ops[EXYNOS_DRM_OPS_DST] = &rot_dst_ops;
- ippdrv->check_property = rotator_ippdrv_check_property;
- ippdrv->start = rotator_ippdrv_start;
- ret = rotator_init_prop_list(ippdrv);
- if (ret < 0) {
- dev_err(dev, "failed to init property list.\n");
- goto err_ippdrv_register;
- }
-
- DRM_DEBUG_KMS("ippdrv[%pK]\n", ippdrv);
-
platform_set_drvdata(pdev, rot);
- ret = exynos_drm_ippdrv_register(ippdrv);
- if (ret < 0) {
- dev_err(dev, "failed to register drm rotator device\n");
- goto err_ippdrv_register;
- }
-
- dev_info(dev, "The exynos rotator is probed successfully\n");
+ ret = component_add(dev, &rotator_component_ops);
+ if (ret)
+ goto err_component;
return 0;
-err_ippdrv_register:
+err_component:
+ pm_runtime_dont_use_autosuspend(dev);
pm_runtime_disable(dev);
return ret;
}
@@ -770,45 +332,101 @@ err_ippdrv_register:
static int rotator_remove(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
- struct rot_context *rot = dev_get_drvdata(dev);
- struct exynos_drm_ippdrv *ippdrv = &rot->ippdrv;
-
- exynos_drm_ippdrv_unregister(ippdrv);
+ component_del(dev, &rotator_component_ops);
+ pm_runtime_dont_use_autosuspend(dev);
pm_runtime_disable(dev);
return 0;
}
#ifdef CONFIG_PM
-static int rotator_clk_crtl(struct rot_context *rot, bool enable)
-{
- if (enable) {
- clk_prepare_enable(rot->clock);
- rot->suspended = false;
- } else {
- clk_disable_unprepare(rot->clock);
- rot->suspended = true;
- }
-
- return 0;
-}
-
static int rotator_runtime_suspend(struct device *dev)
{
struct rot_context *rot = dev_get_drvdata(dev);
- return rotator_clk_crtl(rot, false);
+ clk_disable_unprepare(rot->clock);
+ return 0;
}
static int rotator_runtime_resume(struct device *dev)
{
struct rot_context *rot = dev_get_drvdata(dev);
- return rotator_clk_crtl(rot, true);
+ return clk_prepare_enable(rot->clock);
}
#endif
+static const struct drm_exynos_ipp_limit rotator_4210_rbg888_limits[] = {
+ { IPP_SIZE_LIMIT(BUFFER, .h = { 8, SZ_16K }, .v = { 8, SZ_16K }) },
+ { IPP_SIZE_LIMIT(AREA, .h.align = 4, .v.align = 4) },
+};
+
+static const struct drm_exynos_ipp_limit rotator_4412_rbg888_limits[] = {
+ { IPP_SIZE_LIMIT(BUFFER, .h = { 8, SZ_8K }, .v = { 8, SZ_8K }) },
+ { IPP_SIZE_LIMIT(AREA, .h.align = 4, .v.align = 4) },
+};
+
+static const struct drm_exynos_ipp_limit rotator_5250_rbg888_limits[] = {
+ { IPP_SIZE_LIMIT(BUFFER, .h = { 8, SZ_8K }, .v = { 8, SZ_8K }) },
+ { IPP_SIZE_LIMIT(AREA, .h.align = 2, .v.align = 2) },
+};
+
+static const struct drm_exynos_ipp_limit rotator_4210_yuv_limits[] = {
+ { IPP_SIZE_LIMIT(BUFFER, .h = { 32, SZ_64K }, .v = { 32, SZ_64K }) },
+ { IPP_SIZE_LIMIT(AREA, .h.align = 8, .v.align = 8) },
+};
+
+static const struct drm_exynos_ipp_limit rotator_4412_yuv_limits[] = {
+ { IPP_SIZE_LIMIT(BUFFER, .h = { 32, SZ_32K }, .v = { 32, SZ_32K }) },
+ { IPP_SIZE_LIMIT(AREA, .h.align = 8, .v.align = 8) },
+};
+
+static const struct exynos_drm_ipp_formats rotator_4210_formats[] = {
+ { IPP_SRCDST_FORMAT(XRGB8888, rotator_4210_rbg888_limits) },
+ { IPP_SRCDST_FORMAT(NV12, rotator_4210_yuv_limits) },
+};
+
+static const struct exynos_drm_ipp_formats rotator_4412_formats[] = {
+ { IPP_SRCDST_FORMAT(XRGB8888, rotator_4412_rbg888_limits) },
+ { IPP_SRCDST_FORMAT(NV12, rotator_4412_yuv_limits) },
+};
+
+static const struct exynos_drm_ipp_formats rotator_5250_formats[] = {
+ { IPP_SRCDST_FORMAT(XRGB8888, rotator_5250_rbg888_limits) },
+ { IPP_SRCDST_FORMAT(NV12, rotator_4412_yuv_limits) },
+};
+
+static const struct rot_variant rotator_4210_data = {
+ .formats = rotator_4210_formats,
+ .num_formats = ARRAY_SIZE(rotator_4210_formats),
+};
+
+static const struct rot_variant rotator_4412_data = {
+ .formats = rotator_4412_formats,
+ .num_formats = ARRAY_SIZE(rotator_4412_formats),
+};
+
+static const struct rot_variant rotator_5250_data = {
+ .formats = rotator_5250_formats,
+ .num_formats = ARRAY_SIZE(rotator_5250_formats),
+};
+
+static const struct of_device_id exynos_rotator_match[] = {
+ {
+ .compatible = "samsung,exynos4210-rotator",
+ .data = &rotator_4210_data,
+ }, {
+ .compatible = "samsung,exynos4212-rotator",
+ .data = &rotator_4412_data,
+ }, {
+ .compatible = "samsung,exynos5250-rotator",
+ .data = &rotator_5250_data,
+ }, {
+ },
+};
+MODULE_DEVICE_TABLE(of, exynos_rotator_match);
+
static const struct dev_pm_ops rotator_pm_ops = {
SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
pm_runtime_force_resume)
@@ -820,7 +438,7 @@ struct platform_driver rotator_driver = {
.probe = rotator_probe,
.remove = rotator_remove,
.driver = {
- .name = "exynos-rot",
+ .name = "exynos-rotator",
.owner = THIS_MODULE,
.pm = &rotator_pm_ops,
.of_match_table = exynos_rotator_match,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_scaler.c b/drivers/gpu/drm/exynos/exynos_drm_scaler.c
new file mode 100644
index 000000000000..63b05b7c846a
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_scaler.c
@@ -0,0 +1,694 @@
+/*
+ * Copyright (C) 2017 Samsung Electronics Co.Ltd
+ * Author:
+ * Andrzej Pietrasiewicz <andrzej.p@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundationr
+ */
+
+#include <linux/kernel.h>
+#include <linux/component.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/of_device.h>
+#include <linux/pm_runtime.h>
+
+#include <drm/drmP.h>
+#include <drm/exynos_drm.h>
+#include "regs-scaler.h"
+#include "exynos_drm_fb.h"
+#include "exynos_drm_drv.h"
+#include "exynos_drm_iommu.h"
+#include "exynos_drm_ipp.h"
+
+#define scaler_read(offset) readl(scaler->regs + (offset))
+#define scaler_write(cfg, offset) writel(cfg, scaler->regs + (offset))
+#define SCALER_MAX_CLK 4
+#define SCALER_AUTOSUSPEND_DELAY 2000
+
+struct scaler_data {
+ const char *clk_name[SCALER_MAX_CLK];
+ unsigned int num_clk;
+ const struct exynos_drm_ipp_formats *formats;
+ unsigned int num_formats;
+};
+
+struct scaler_context {
+ struct exynos_drm_ipp ipp;
+ struct drm_device *drm_dev;
+ struct device *dev;
+ void __iomem *regs;
+ struct clk *clock[SCALER_MAX_CLK];
+ struct exynos_drm_ipp_task *task;
+ const struct scaler_data *scaler_data;
+};
+
+static u32 scaler_get_format(u32 drm_fmt)
+{
+ switch (drm_fmt) {
+ case DRM_FORMAT_NV21:
+ return SCALER_YUV420_2P_UV;
+ case DRM_FORMAT_NV12:
+ return SCALER_YUV420_2P_VU;
+ case DRM_FORMAT_YUV420:
+ return SCALER_YUV420_3P;
+ case DRM_FORMAT_YUYV:
+ return SCALER_YUV422_1P_YUYV;
+ case DRM_FORMAT_UYVY:
+ return SCALER_YUV422_1P_UYVY;
+ case DRM_FORMAT_YVYU:
+ return SCALER_YUV422_1P_YVYU;
+ case DRM_FORMAT_NV61:
+ return SCALER_YUV422_2P_UV;
+ case DRM_FORMAT_NV16:
+ return SCALER_YUV422_2P_VU;
+ case DRM_FORMAT_YUV422:
+ return SCALER_YUV422_3P;
+ case DRM_FORMAT_NV42:
+ return SCALER_YUV444_2P_UV;
+ case DRM_FORMAT_NV24:
+ return SCALER_YUV444_2P_VU;
+ case DRM_FORMAT_YUV444:
+ return SCALER_YUV444_3P;
+ case DRM_FORMAT_RGB565:
+ return SCALER_RGB_565;
+ case DRM_FORMAT_XRGB1555:
+ return SCALER_ARGB1555;
+ case DRM_FORMAT_ARGB1555:
+ return SCALER_ARGB1555;
+ case DRM_FORMAT_XRGB4444:
+ return SCALER_ARGB4444;
+ case DRM_FORMAT_ARGB4444:
+ return SCALER_ARGB4444;
+ case DRM_FORMAT_XRGB8888:
+ return SCALER_ARGB8888;
+ case DRM_FORMAT_ARGB8888:
+ return SCALER_ARGB8888;
+ case DRM_FORMAT_RGBX8888:
+ return SCALER_RGBA8888;
+ case DRM_FORMAT_RGBA8888:
+ return SCALER_RGBA8888;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static inline void scaler_enable_int(struct scaler_context *scaler)
+{
+ u32 val;
+
+ val = SCALER_INT_EN_TIMEOUT |
+ SCALER_INT_EN_ILLEGAL_BLEND |
+ SCALER_INT_EN_ILLEGAL_RATIO |
+ SCALER_INT_EN_ILLEGAL_DST_HEIGHT |
+ SCALER_INT_EN_ILLEGAL_DST_WIDTH |
+ SCALER_INT_EN_ILLEGAL_DST_V_POS |
+ SCALER_INT_EN_ILLEGAL_DST_H_POS |
+ SCALER_INT_EN_ILLEGAL_DST_C_SPAN |
+ SCALER_INT_EN_ILLEGAL_DST_Y_SPAN |
+ SCALER_INT_EN_ILLEGAL_DST_CR_BASE |
+ SCALER_INT_EN_ILLEGAL_DST_CB_BASE |
+ SCALER_INT_EN_ILLEGAL_DST_Y_BASE |
+ SCALER_INT_EN_ILLEGAL_DST_COLOR |
+ SCALER_INT_EN_ILLEGAL_SRC_HEIGHT |
+ SCALER_INT_EN_ILLEGAL_SRC_WIDTH |
+ SCALER_INT_EN_ILLEGAL_SRC_CV_POS |
+ SCALER_INT_EN_ILLEGAL_SRC_CH_POS |
+ SCALER_INT_EN_ILLEGAL_SRC_YV_POS |
+ SCALER_INT_EN_ILLEGAL_SRC_YH_POS |
+ SCALER_INT_EN_ILLEGAL_DST_SPAN |
+ SCALER_INT_EN_ILLEGAL_SRC_Y_SPAN |
+ SCALER_INT_EN_ILLEGAL_SRC_CR_BASE |
+ SCALER_INT_EN_ILLEGAL_SRC_CB_BASE |
+ SCALER_INT_EN_ILLEGAL_SRC_Y_BASE |
+ SCALER_INT_EN_ILLEGAL_SRC_COLOR |
+ SCALER_INT_EN_FRAME_END;
+ scaler_write(val, SCALER_INT_EN);
+}
+
+static inline void scaler_set_src_fmt(struct scaler_context *scaler,
+ u32 src_fmt)
+{
+ u32 val;
+
+ val = SCALER_SRC_CFG_SET_COLOR_FORMAT(src_fmt);
+ scaler_write(val, SCALER_SRC_CFG);
+}
+
+static inline void scaler_set_src_base(struct scaler_context *scaler,
+ struct exynos_drm_ipp_buffer *src_buf)
+{
+ static unsigned int bases[] = {
+ SCALER_SRC_Y_BASE,
+ SCALER_SRC_CB_BASE,
+ SCALER_SRC_CR_BASE,
+ };
+ int i;
+
+ for (i = 0; i < src_buf->format->num_planes; ++i)
+ scaler_write(src_buf->dma_addr[i], bases[i]);
+}
+
+static inline void scaler_set_src_span(struct scaler_context *scaler,
+ struct exynos_drm_ipp_buffer *src_buf)
+{
+ u32 val;
+
+ val = SCALER_SRC_SPAN_SET_Y_SPAN(src_buf->buf.pitch[0] /
+ src_buf->format->cpp[0]);
+
+ if (src_buf->format->num_planes > 1)
+ val |= SCALER_SRC_SPAN_SET_C_SPAN(src_buf->buf.pitch[1]);
+
+ scaler_write(val, SCALER_SRC_SPAN);
+}
+
+static inline void scaler_set_src_luma_pos(struct scaler_context *scaler,
+ struct drm_exynos_ipp_task_rect *src_pos)
+{
+ u32 val;
+
+ val = SCALER_SRC_Y_POS_SET_YH_POS(src_pos->x << 2);
+ val |= SCALER_SRC_Y_POS_SET_YV_POS(src_pos->y << 2);
+ scaler_write(val, SCALER_SRC_Y_POS);
+ scaler_write(val, SCALER_SRC_C_POS); /* ATTENTION! */
+}
+
+static inline void scaler_set_src_wh(struct scaler_context *scaler,
+ struct drm_exynos_ipp_task_rect *src_pos)
+{
+ u32 val;
+
+ val = SCALER_SRC_WH_SET_WIDTH(src_pos->w);
+ val |= SCALER_SRC_WH_SET_HEIGHT(src_pos->h);
+ scaler_write(val, SCALER_SRC_WH);
+}
+
+static inline void scaler_set_dst_fmt(struct scaler_context *scaler,
+ u32 dst_fmt)
+{
+ u32 val;
+
+ val = SCALER_DST_CFG_SET_COLOR_FORMAT(dst_fmt);
+ scaler_write(val, SCALER_DST_CFG);
+}
+
+static inline void scaler_set_dst_base(struct scaler_context *scaler,
+ struct exynos_drm_ipp_buffer *dst_buf)
+{
+ static unsigned int bases[] = {
+ SCALER_DST_Y_BASE,
+ SCALER_DST_CB_BASE,
+ SCALER_DST_CR_BASE,
+ };
+ int i;
+
+ for (i = 0; i < dst_buf->format->num_planes; ++i)
+ scaler_write(dst_buf->dma_addr[i], bases[i]);
+}
+
+static inline void scaler_set_dst_span(struct scaler_context *scaler,
+ struct exynos_drm_ipp_buffer *dst_buf)
+{
+ u32 val;
+
+ val = SCALER_DST_SPAN_SET_Y_SPAN(dst_buf->buf.pitch[0] /
+ dst_buf->format->cpp[0]);
+
+ if (dst_buf->format->num_planes > 1)
+ val |= SCALER_DST_SPAN_SET_C_SPAN(dst_buf->buf.pitch[1]);
+
+ scaler_write(val, SCALER_DST_SPAN);
+}
+
+static inline void scaler_set_dst_luma_pos(struct scaler_context *scaler,
+ struct drm_exynos_ipp_task_rect *dst_pos)
+{
+ u32 val;
+
+ val = SCALER_DST_WH_SET_WIDTH(dst_pos->w);
+ val |= SCALER_DST_WH_SET_HEIGHT(dst_pos->h);
+ scaler_write(val, SCALER_DST_WH);
+}
+
+static inline void scaler_set_dst_wh(struct scaler_context *scaler,
+ struct drm_exynos_ipp_task_rect *dst_pos)
+{
+ u32 val;
+
+ val = SCALER_DST_POS_SET_H_POS(dst_pos->x);
+ val |= SCALER_DST_POS_SET_V_POS(dst_pos->y);
+ scaler_write(val, SCALER_DST_POS);
+}
+
+static inline void scaler_set_hv_ratio(struct scaler_context *scaler,
+ unsigned int rotation,
+ struct drm_exynos_ipp_task_rect *src_pos,
+ struct drm_exynos_ipp_task_rect *dst_pos)
+{
+ u32 val, h_ratio, v_ratio;
+
+ if (drm_rotation_90_or_270(rotation)) {
+ h_ratio = (src_pos->h << 16) / dst_pos->w;
+ v_ratio = (src_pos->w << 16) / dst_pos->h;
+ } else {
+ h_ratio = (src_pos->w << 16) / dst_pos->w;
+ v_ratio = (src_pos->h << 16) / dst_pos->h;
+ }
+
+ val = SCALER_H_RATIO_SET(h_ratio);
+ scaler_write(val, SCALER_H_RATIO);
+
+ val = SCALER_V_RATIO_SET(v_ratio);
+ scaler_write(val, SCALER_V_RATIO);
+}
+
+static inline void scaler_set_rotation(struct scaler_context *scaler,
+ unsigned int rotation)
+{
+ u32 val = 0;
+
+ if (rotation & DRM_MODE_ROTATE_90)
+ val |= SCALER_ROT_CFG_SET_ROTMODE(SCALER_ROT_MODE_90);
+ else if (rotation & DRM_MODE_ROTATE_180)
+ val |= SCALER_ROT_CFG_SET_ROTMODE(SCALER_ROT_MODE_180);
+ else if (rotation & DRM_MODE_ROTATE_270)
+ val |= SCALER_ROT_CFG_SET_ROTMODE(SCALER_ROT_MODE_270);
+ if (rotation & DRM_MODE_REFLECT_X)
+ val |= SCALER_ROT_CFG_FLIP_X_EN;
+ if (rotation & DRM_MODE_REFLECT_Y)
+ val |= SCALER_ROT_CFG_FLIP_Y_EN;
+ scaler_write(val, SCALER_ROT_CFG);
+}
+
+static inline void scaler_set_csc(struct scaler_context *scaler,
+ const struct drm_format_info *fmt)
+{
+ static const u32 csc_mtx[2][3][3] = {
+ { /* YCbCr to RGB */
+ {0x254, 0x000, 0x331},
+ {0x254, 0xf38, 0xe60},
+ {0x254, 0x409, 0x000},
+ },
+ { /* RGB to YCbCr */
+ {0x084, 0x102, 0x032},
+ {0xfb4, 0xf6b, 0x0e1},
+ {0x0e1, 0xf44, 0xfdc},
+ },
+ };
+ int i, j, dir;
+
+ switch (fmt->format) {
+ case DRM_FORMAT_RGB565:
+ case DRM_FORMAT_XRGB1555:
+ case DRM_FORMAT_ARGB1555:
+ case DRM_FORMAT_XRGB4444:
+ case DRM_FORMAT_ARGB4444:
+ case DRM_FORMAT_XRGB8888:
+ case DRM_FORMAT_ARGB8888:
+ case DRM_FORMAT_RGBX8888:
+ case DRM_FORMAT_RGBA8888:
+ dir = 1;
+ break;
+ default:
+ dir = 0;
+ }
+
+ for (i = 0; i < 3; i++)
+ for (j = 0; j < 3; j++)
+ scaler_write(csc_mtx[dir][i][j], SCALER_CSC_COEF(j, i));
+}
+
+static inline void scaler_set_timer(struct scaler_context *scaler,
+ unsigned int timer, unsigned int divider)
+{
+ u32 val;
+
+ val = SCALER_TIMEOUT_CTRL_TIMER_ENABLE;
+ val |= SCALER_TIMEOUT_CTRL_SET_TIMER_VALUE(timer);
+ val |= SCALER_TIMEOUT_CTRL_SET_TIMER_DIV(divider);
+ scaler_write(val, SCALER_TIMEOUT_CTRL);
+}
+
+static inline void scaler_start_hw(struct scaler_context *scaler)
+{
+ scaler_write(SCALER_CFG_START_CMD, SCALER_CFG);
+}
+
+static int scaler_commit(struct exynos_drm_ipp *ipp,
+ struct exynos_drm_ipp_task *task)
+{
+ struct scaler_context *scaler =
+ container_of(ipp, struct scaler_context, ipp);
+
+ u32 src_fmt = scaler_get_format(task->src.buf.fourcc);
+ struct drm_exynos_ipp_task_rect *src_pos = &task->src.rect;
+
+ u32 dst_fmt = scaler_get_format(task->dst.buf.fourcc);
+ struct drm_exynos_ipp_task_rect *dst_pos = &task->dst.rect;
+
+ scaler->task = task;
+
+ pm_runtime_get_sync(scaler->dev);
+
+ scaler_set_src_fmt(scaler, src_fmt);
+ scaler_set_src_base(scaler, &task->src);
+ scaler_set_src_span(scaler, &task->src);
+ scaler_set_src_luma_pos(scaler, src_pos);
+ scaler_set_src_wh(scaler, src_pos);
+
+ scaler_set_dst_fmt(scaler, dst_fmt);
+ scaler_set_dst_base(scaler, &task->dst);
+ scaler_set_dst_span(scaler, &task->dst);
+ scaler_set_dst_luma_pos(scaler, dst_pos);
+ scaler_set_dst_wh(scaler, dst_pos);
+
+ scaler_set_hv_ratio(scaler, task->transform.rotation, src_pos, dst_pos);
+ scaler_set_rotation(scaler, task->transform.rotation);
+
+ scaler_set_csc(scaler, task->src.format);
+
+ scaler_set_timer(scaler, 0xffff, 0xf);
+
+ scaler_enable_int(scaler);
+ scaler_start_hw(scaler);
+
+ return 0;
+}
+
+static struct exynos_drm_ipp_funcs ipp_funcs = {
+ .commit = scaler_commit,
+};
+
+static inline void scaler_disable_int(struct scaler_context *scaler)
+{
+ scaler_write(0, SCALER_INT_EN);
+}
+
+static inline u32 scaler_get_int_status(struct scaler_context *scaler)
+{
+ return scaler_read(SCALER_INT_STATUS);
+}
+
+static inline bool scaler_task_done(u32 val)
+{
+ return val & SCALER_INT_STATUS_FRAME_END ? 0 : -EINVAL;
+}
+
+static irqreturn_t scaler_irq_handler(int irq, void *arg)
+{
+ struct scaler_context *scaler = arg;
+
+ u32 val = scaler_get_int_status(scaler);
+
+ scaler_disable_int(scaler);
+
+ if (scaler->task) {
+ struct exynos_drm_ipp_task *task = scaler->task;
+
+ scaler->task = NULL;
+ pm_runtime_mark_last_busy(scaler->dev);
+ pm_runtime_put_autosuspend(scaler->dev);
+ exynos_drm_ipp_task_done(task, scaler_task_done(val));
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int scaler_bind(struct device *dev, struct device *master, void *data)
+{
+ struct scaler_context *scaler = dev_get_drvdata(dev);
+ struct drm_device *drm_dev = data;
+ struct exynos_drm_ipp *ipp = &scaler->ipp;
+
+ scaler->drm_dev = drm_dev;
+ drm_iommu_attach_device(drm_dev, dev);
+
+ exynos_drm_ipp_register(drm_dev, ipp, &ipp_funcs,
+ DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE |
+ DRM_EXYNOS_IPP_CAP_SCALE | DRM_EXYNOS_IPP_CAP_CONVERT,
+ scaler->scaler_data->formats,
+ scaler->scaler_data->num_formats, "scaler");
+
+ dev_info(dev, "The exynos scaler has been probed successfully\n");
+
+ return 0;
+}
+
+static void scaler_unbind(struct device *dev, struct device *master,
+ void *data)
+{
+ struct scaler_context *scaler = dev_get_drvdata(dev);
+ struct drm_device *drm_dev = data;
+ struct exynos_drm_ipp *ipp = &scaler->ipp;
+
+ exynos_drm_ipp_unregister(drm_dev, ipp);
+ drm_iommu_detach_device(scaler->drm_dev, scaler->dev);
+}
+
+static const struct component_ops scaler_component_ops = {
+ .bind = scaler_bind,
+ .unbind = scaler_unbind,
+};
+
+static int scaler_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct resource *regs_res;
+ struct scaler_context *scaler;
+ int irq;
+ int ret, i;
+
+ scaler = devm_kzalloc(dev, sizeof(*scaler), GFP_KERNEL);
+ if (!scaler)
+ return -ENOMEM;
+
+ scaler->scaler_data =
+ (struct scaler_data *)of_device_get_match_data(dev);
+
+ scaler->dev = dev;
+ regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ scaler->regs = devm_ioremap_resource(dev, regs_res);
+ if (IS_ERR(scaler->regs))
+ return PTR_ERR(scaler->regs);
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
+ dev_err(dev, "failed to get irq\n");
+ return irq;
+ }
+
+ ret = devm_request_threaded_irq(dev, irq, NULL, scaler_irq_handler,
+ IRQF_ONESHOT, "drm_scaler", scaler);
+ if (ret < 0) {
+ dev_err(dev, "failed to request irq\n");
+ return ret;
+ }
+
+ for (i = 0; i < scaler->scaler_data->num_clk; ++i) {
+ scaler->clock[i] = devm_clk_get(dev,
+ scaler->scaler_data->clk_name[i]);
+ if (IS_ERR(scaler->clock[i])) {
+ dev_err(dev, "failed to get clock\n");
+ return PTR_ERR(scaler->clock[i]);
+ }
+ }
+
+ pm_runtime_use_autosuspend(dev);
+ pm_runtime_set_autosuspend_delay(dev, SCALER_AUTOSUSPEND_DELAY);
+ pm_runtime_enable(dev);
+ platform_set_drvdata(pdev, scaler);
+
+ ret = component_add(dev, &scaler_component_ops);
+ if (ret)
+ goto err_ippdrv_register;
+
+ return 0;
+
+err_ippdrv_register:
+ pm_runtime_dont_use_autosuspend(dev);
+ pm_runtime_disable(dev);
+ return ret;
+}
+
+static int scaler_remove(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+
+ component_del(dev, &scaler_component_ops);
+ pm_runtime_dont_use_autosuspend(dev);
+ pm_runtime_disable(dev);
+
+ return 0;
+}
+
+#ifdef CONFIG_PM
+
+static int clk_disable_unprepare_wrapper(struct clk *clk)
+{
+ clk_disable_unprepare(clk);
+
+ return 0;
+}
+
+static int scaler_clk_ctrl(struct scaler_context *scaler, bool enable)
+{
+ int (*clk_fun)(struct clk *clk), i;
+
+ clk_fun = enable ? clk_prepare_enable : clk_disable_unprepare_wrapper;
+
+ for (i = 0; i < scaler->scaler_data->num_clk; ++i)
+ clk_fun(scaler->clock[i]);
+
+ return 0;
+}
+
+static int scaler_runtime_suspend(struct device *dev)
+{
+ struct scaler_context *scaler = dev_get_drvdata(dev);
+
+ return scaler_clk_ctrl(scaler, false);
+}
+
+static int scaler_runtime_resume(struct device *dev)
+{
+ struct scaler_context *scaler = dev_get_drvdata(dev);
+
+ return scaler_clk_ctrl(scaler, true);
+}
+#endif
+
+static const struct dev_pm_ops scaler_pm_ops = {
+ SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+ pm_runtime_force_resume)
+ SET_RUNTIME_PM_OPS(scaler_runtime_suspend, scaler_runtime_resume, NULL)
+};
+
+static const struct drm_exynos_ipp_limit scaler_5420_two_pixel_hv_limits[] = {
+ { IPP_SIZE_LIMIT(BUFFER, .h = { 16, SZ_8K }, .v = { 16, SZ_8K }) },
+ { IPP_SIZE_LIMIT(AREA, .h.align = 2, .v.align = 2) },
+ { IPP_SCALE_LIMIT(.h = { 65536 * 1 / 4, 65536 * 16 },
+ .v = { 65536 * 1 / 4, 65536 * 16 }) },
+};
+
+static const struct drm_exynos_ipp_limit scaler_5420_two_pixel_h_limits[] = {
+ { IPP_SIZE_LIMIT(BUFFER, .h = { 16, SZ_8K }, .v = { 16, SZ_8K }) },
+ { IPP_SIZE_LIMIT(AREA, .h.align = 2, .v.align = 1) },
+ { IPP_SCALE_LIMIT(.h = { 65536 * 1 / 4, 65536 * 16 },
+ .v = { 65536 * 1 / 4, 65536 * 16 }) },
+};
+
+static const struct drm_exynos_ipp_limit scaler_5420_one_pixel_limits[] = {
+ { IPP_SIZE_LIMIT(BUFFER, .h = { 16, SZ_8K }, .v = { 16, SZ_8K }) },
+ { IPP_SCALE_LIMIT(.h = { 65536 * 1 / 4, 65536 * 16 },
+ .v = { 65536 * 1 / 4, 65536 * 16 }) },
+};
+
+static const struct exynos_drm_ipp_formats exynos5420_formats[] = {
+ /* SCALER_YUV420_2P_UV */
+ { IPP_SRCDST_FORMAT(NV21, scaler_5420_two_pixel_hv_limits) },
+
+ /* SCALER_YUV420_2P_VU */
+ { IPP_SRCDST_FORMAT(NV12, scaler_5420_two_pixel_hv_limits) },
+
+ /* SCALER_YUV420_3P */
+ { IPP_SRCDST_FORMAT(YUV420, scaler_5420_two_pixel_hv_limits) },
+
+ /* SCALER_YUV422_1P_YUYV */
+ { IPP_SRCDST_FORMAT(YUYV, scaler_5420_two_pixel_h_limits) },
+
+ /* SCALER_YUV422_1P_UYVY */
+ { IPP_SRCDST_FORMAT(UYVY, scaler_5420_two_pixel_h_limits) },
+
+ /* SCALER_YUV422_1P_YVYU */
+ { IPP_SRCDST_FORMAT(YVYU, scaler_5420_two_pixel_h_limits) },
+
+ /* SCALER_YUV422_2P_UV */
+ { IPP_SRCDST_FORMAT(NV61, scaler_5420_two_pixel_h_limits) },
+
+ /* SCALER_YUV422_2P_VU */
+ { IPP_SRCDST_FORMAT(NV16, scaler_5420_two_pixel_h_limits) },
+
+ /* SCALER_YUV422_3P */
+ { IPP_SRCDST_FORMAT(YUV422, scaler_5420_two_pixel_h_limits) },
+
+ /* SCALER_YUV444_2P_UV */
+ { IPP_SRCDST_FORMAT(NV42, scaler_5420_one_pixel_limits) },
+
+ /* SCALER_YUV444_2P_VU */
+ { IPP_SRCDST_FORMAT(NV24, scaler_5420_one_pixel_limits) },
+
+ /* SCALER_YUV444_3P */
+ { IPP_SRCDST_FORMAT(YUV444, scaler_5420_one_pixel_limits) },
+
+ /* SCALER_RGB_565 */
+ { IPP_SRCDST_FORMAT(RGB565, scaler_5420_one_pixel_limits) },
+
+ /* SCALER_ARGB1555 */
+ { IPP_SRCDST_FORMAT(XRGB1555, scaler_5420_one_pixel_limits) },
+
+ /* SCALER_ARGB1555 */
+ { IPP_SRCDST_FORMAT(ARGB1555, scaler_5420_one_pixel_limits) },
+
+ /* SCALER_ARGB4444 */
+ { IPP_SRCDST_FORMAT(XRGB4444, scaler_5420_one_pixel_limits) },
+
+ /* SCALER_ARGB4444 */
+ { IPP_SRCDST_FORMAT(ARGB4444, scaler_5420_one_pixel_limits) },
+
+ /* SCALER_ARGB8888 */
+ { IPP_SRCDST_FORMAT(XRGB8888, scaler_5420_one_pixel_limits) },
+
+ /* SCALER_ARGB8888 */
+ { IPP_SRCDST_FORMAT(ARGB8888, scaler_5420_one_pixel_limits) },
+
+ /* SCALER_RGBA8888 */
+ { IPP_SRCDST_FORMAT(RGBX8888, scaler_5420_one_pixel_limits) },
+
+ /* SCALER_RGBA8888 */
+ { IPP_SRCDST_FORMAT(RGBA8888, scaler_5420_one_pixel_limits) },
+};
+
+static const struct scaler_data exynos5420_data = {
+ .clk_name = {"mscl"},
+ .num_clk = 1,
+ .formats = exynos5420_formats,
+ .num_formats = ARRAY_SIZE(exynos5420_formats),
+};
+
+static const struct scaler_data exynos5433_data = {
+ .clk_name = {"pclk", "aclk", "aclk_xiu"},
+ .num_clk = 3,
+ .formats = exynos5420_formats, /* intentional */
+ .num_formats = ARRAY_SIZE(exynos5420_formats),
+};
+
+static const struct of_device_id exynos_scaler_match[] = {
+ {
+ .compatible = "samsung,exynos5420-scaler",
+ .data = &exynos5420_data,
+ }, {
+ .compatible = "samsung,exynos5433-scaler",
+ .data = &exynos5433_data,
+ }, {
+ },
+};
+MODULE_DEVICE_TABLE(of, exynos_scaler_match);
+
+struct platform_driver scaler_driver = {
+ .probe = scaler_probe,
+ .remove = scaler_remove,
+ .driver = {
+ .name = "exynos-scaler",
+ .owner = THIS_MODULE,
+ .pm = &scaler_pm_ops,
+ .of_match_table = exynos_scaler_match,
+ },
+};
diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c
index abd84cbcf1c2..09c4bc0b1859 100644
--- a/drivers/gpu/drm/exynos/exynos_hdmi.c
+++ b/drivers/gpu/drm/exynos/exynos_hdmi.c
@@ -954,8 +954,6 @@ static int hdmi_create_connector(struct drm_encoder *encoder)
drm_mode_connector_attach_encoder(connector, encoder);
if (hdata->bridge) {
- encoder->bridge = hdata->bridge;
- hdata->bridge->encoder = encoder;
ret = drm_bridge_attach(encoder, hdata->bridge, NULL);
if (ret)
DRM_ERROR("Failed to attach bridge\n");
diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c
index 257299ec95c4..272c79f5f5bf 100644
--- a/drivers/gpu/drm/exynos/exynos_mixer.c
+++ b/drivers/gpu/drm/exynos/exynos_mixer.c
@@ -473,7 +473,7 @@ static void vp_video_buffer(struct mixer_context *ctx,
chroma_addr[1] = chroma_addr[0] + 0x40;
} else {
luma_addr[1] = luma_addr[0] + fb->pitches[0];
- chroma_addr[1] = chroma_addr[0] + fb->pitches[0];
+ chroma_addr[1] = chroma_addr[0] + fb->pitches[1];
}
} else {
luma_addr[1] = 0;
@@ -482,6 +482,7 @@ static void vp_video_buffer(struct mixer_context *ctx,
spin_lock_irqsave(&ctx->reg_slock, flags);
+ vp_reg_write(ctx, VP_SHADOW_UPDATE, 1);
/* interlace or progressive scan mode */
val = (test_bit(MXR_BIT_INTERLACE, &ctx->flags) ? ~0 : 0);
vp_reg_writemask(ctx, VP_MODE, val, VP_MODE_LINE_SKIP);
@@ -495,21 +496,23 @@ static void vp_video_buffer(struct mixer_context *ctx,
vp_reg_write(ctx, VP_IMG_SIZE_Y, VP_IMG_HSIZE(fb->pitches[0]) |
VP_IMG_VSIZE(fb->height));
/* chroma plane for NV12/NV21 is half the height of the luma plane */
- vp_reg_write(ctx, VP_IMG_SIZE_C, VP_IMG_HSIZE(fb->pitches[0]) |
+ vp_reg_write(ctx, VP_IMG_SIZE_C, VP_IMG_HSIZE(fb->pitches[1]) |
VP_IMG_VSIZE(fb->height / 2));
vp_reg_write(ctx, VP_SRC_WIDTH, state->src.w);
- vp_reg_write(ctx, VP_SRC_HEIGHT, state->src.h);
vp_reg_write(ctx, VP_SRC_H_POSITION,
VP_SRC_H_POSITION_VAL(state->src.x));
- vp_reg_write(ctx, VP_SRC_V_POSITION, state->src.y);
-
vp_reg_write(ctx, VP_DST_WIDTH, state->crtc.w);
vp_reg_write(ctx, VP_DST_H_POSITION, state->crtc.x);
+
if (test_bit(MXR_BIT_INTERLACE, &ctx->flags)) {
+ vp_reg_write(ctx, VP_SRC_HEIGHT, state->src.h / 2);
+ vp_reg_write(ctx, VP_SRC_V_POSITION, state->src.y / 2);
vp_reg_write(ctx, VP_DST_HEIGHT, state->crtc.h / 2);
vp_reg_write(ctx, VP_DST_V_POSITION, state->crtc.y / 2);
} else {
+ vp_reg_write(ctx, VP_SRC_HEIGHT, state->src.h);
+ vp_reg_write(ctx, VP_SRC_V_POSITION, state->src.y);
vp_reg_write(ctx, VP_DST_HEIGHT, state->crtc.h);
vp_reg_write(ctx, VP_DST_V_POSITION, state->crtc.y);
}
@@ -699,6 +702,15 @@ static irqreturn_t mixer_irq_handler(int irq, void *arg)
/* interlace scan need to check shadow register */
if (test_bit(MXR_BIT_INTERLACE, &ctx->flags)) {
+ if (test_bit(MXR_BIT_VP_ENABLED, &ctx->flags) &&
+ vp_reg_read(ctx, VP_SHADOW_UPDATE))
+ goto out;
+
+ base = mixer_reg_read(ctx, MXR_CFG);
+ shadow = mixer_reg_read(ctx, MXR_CFG_S);
+ if (base != shadow)
+ goto out;
+
base = mixer_reg_read(ctx, MXR_GRAPHIC_BASE(0));
shadow = mixer_reg_read(ctx, MXR_GRAPHIC_BASE_S(0));
if (base != shadow)
diff --git a/drivers/gpu/drm/exynos/regs-mixer.h b/drivers/gpu/drm/exynos/regs-mixer.h
index c311f571bdf9..189cfa2470a8 100644
--- a/drivers/gpu/drm/exynos/regs-mixer.h
+++ b/drivers/gpu/drm/exynos/regs-mixer.h
@@ -47,6 +47,7 @@
#define MXR_MO 0x0304
#define MXR_RESOLUTION 0x0310
+#define MXR_CFG_S 0x2004
#define MXR_GRAPHIC0_BASE_S 0x2024
#define MXR_GRAPHIC1_BASE_S 0x2044
diff --git a/drivers/gpu/drm/exynos/regs-scaler.h b/drivers/gpu/drm/exynos/regs-scaler.h
new file mode 100644
index 000000000000..fc7ccad75e74
--- /dev/null
+++ b/drivers/gpu/drm/exynos/regs-scaler.h
@@ -0,0 +1,426 @@
+/* drivers/gpu/drm/exynos/regs-scaler.h
+ *
+ * Copyright (c) 2017 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ * Author: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
+ *
+ * Register definition file for Samsung scaler driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef EXYNOS_REGS_SCALER_H
+#define EXYNOS_REGS_SCALER_H
+
+/* Register part */
+
+/* Global setting */
+#define SCALER_STATUS 0x0 /* no shadow */
+#define SCALER_CFG 0x4
+
+/* Interrupt */
+#define SCALER_INT_EN 0x8 /* no shadow */
+#define SCALER_INT_STATUS 0xc /* no shadow */
+
+/* SRC */
+#define SCALER_SRC_CFG 0x10
+#define SCALER_SRC_Y_BASE 0x14
+#define SCALER_SRC_CB_BASE 0x18
+#define SCALER_SRC_CR_BASE 0x294
+#define SCALER_SRC_SPAN 0x1c
+#define SCALER_SRC_Y_POS 0x20
+#define SCALER_SRC_WH 0x24
+#define SCALER_SRC_C_POS 0x28
+
+/* DST */
+#define SCALER_DST_CFG 0x30
+#define SCALER_DST_Y_BASE 0x34
+#define SCALER_DST_CB_BASE 0x38
+#define SCALER_DST_CR_BASE 0x298
+#define SCALER_DST_SPAN 0x3c
+#define SCALER_DST_WH 0x40
+#define SCALER_DST_POS 0x44
+
+/* Ratio */
+#define SCALER_H_RATIO 0x50
+#define SCALER_V_RATIO 0x54
+
+/* Rotation */
+#define SCALER_ROT_CFG 0x58
+
+/* Coefficient */
+/*
+ * YHCOEF_{x}{A|B|C|D} CHCOEF_{x}{A|B|C|D}
+ *
+ * A B C D A B C D
+ * 0 60 64 68 6c 140 144 148 14c
+ * 1 70 74 78 7c 150 154 158 15c
+ * 2 80 84 88 8c 160 164 168 16c
+ * 3 90 94 98 9c 170 174 178 17c
+ * 4 a0 a4 a8 ac 180 184 188 18c
+ * 5 b0 b4 b8 bc 190 194 198 19c
+ * 6 c0 c4 c8 cc 1a0 1a4 1a8 1ac
+ * 7 d0 d4 d8 dc 1b0 1b4 1b8 1bc
+ * 8 e0 e4 e8 ec 1c0 1c4 1c8 1cc
+ *
+ *
+ * YVCOEF_{x}{A|B} CVCOEF_{x}{A|B}
+ *
+ * A B A B
+ * 0 f0 f4 1d0 1d4
+ * 1 f8 fc 1d8 1dc
+ * 2 100 104 1e0 1e4
+ * 3 108 10c 1e8 1ec
+ * 4 110 114 1f0 1f4
+ * 5 118 11c 1f8 1fc
+ * 6 120 124 200 204
+ * 7 128 12c 208 20c
+ * 8 130 134 210 214
+ */
+#define _SCALER_HCOEF_DELTA(r, c) ((r) * 0x10 + (c) * 0x4)
+#define _SCALER_VCOEF_DELTA(r, c) ((r) * 0x8 + (c) * 0x4)
+
+#define SCALER_YHCOEF(r, c) (0x60 + _SCALER_HCOEF_DELTA((r), (c)))
+#define SCALER_YVCOEF(r, c) (0xf0 + _SCALER_VCOEF_DELTA((r), (c)))
+#define SCALER_CHCOEF(r, c) (0x140 + _SCALER_HCOEF_DELTA((r), (c)))
+#define SCALER_CVCOEF(r, c) (0x1d0 + _SCALER_VCOEF_DELTA((r), (c)))
+
+
+/* Color Space Conversion */
+#define SCALER_CSC_COEF(x, y) (0x220 + (y) * 0xc + (x) * 0x4)
+
+/* Dithering */
+#define SCALER_DITH_CFG 0x250
+
+/* Version Number */
+#define SCALER_VER 0x260 /* no shadow */
+
+/* Cycle count and Timeout */
+#define SCALER_CYCLE_COUNT 0x278 /* no shadow */
+#define SCALER_TIMEOUT_CTRL 0x2c0 /* no shadow */
+#define SCALER_TIMEOUT_CNT 0x2c4 /* no shadow */
+
+/* Blending */
+#define SCALER_SRC_BLEND_COLOR 0x280
+#define SCALER_SRC_BLEND_ALPHA 0x284
+#define SCALER_DST_BLEND_COLOR 0x288
+#define SCALER_DST_BLEND_ALPHA 0x28c
+
+/* Color Fill */
+#define SCALER_FILL_COLOR 0x290
+
+/* Multiple Command Queue */
+#define SCALER_ADDR_Q_CONFIG 0x2a0 /* no shadow */
+#define SCALER_SRC_ADDR_Q_STATUS 0x2a4 /* no shadow */
+#define SCALER_SRC_ADDR_Q 0x2a8 /* no shadow */
+
+/* CRC */
+#define SCALER_CRC_COLOR00_10 0x2b0 /* no shadow */
+#define SCALER_CRC_COLOR20_30 0x2b4 /* no shadow */
+#define SCALER_CRC_COLOR01_11 0x2b8 /* no shadow */
+#define SCALER_CRC_COLOR21_31 0x2bc /* no shadow */
+
+/* Shadow Registers */
+#define SCALER_SHADOW_OFFSET 0x1000
+
+
+/* Bit definition part */
+#define SCALER_MASK(hi_b, lo_b) ((1 << ((hi_b) - (lo_b) + 1)) - 1)
+#define SCALER_GET(reg, hi_b, lo_b) \
+ (((reg) >> (lo_b)) & SCALER_MASK(hi_b, lo_b))
+#define SCALER_SET(val, hi_b, lo_b) \
+ (((val) & SCALER_MASK(hi_b, lo_b)) << lo_b)
+
+/* SCALER_STATUS */
+#define SCALER_STATUS_SCALER_RUNNING (1 << 1)
+#define SCALER_STATUS_SCALER_READY_CLK_DOWN (1 << 0)
+
+/* SCALER_CFG */
+#define SCALER_CFG_FILL_EN (1 << 24)
+#define SCALER_CFG_BLEND_COLOR_DIVIDE_ALPHA_EN (1 << 17)
+#define SCALER_CFG_BLEND_EN (1 << 16)
+#define SCALER_CFG_CSC_Y_OFFSET_SRC_EN (1 << 10)
+#define SCALER_CFG_CSC_Y_OFFSET_DST_EN (1 << 9)
+#define SCALER_CFG_16_BURST_MODE (1 << 8)
+#define SCALER_CFG_SOFT_RESET (1 << 1)
+#define SCALER_CFG_START_CMD (1 << 0)
+
+/* SCALER_INT_EN */
+#define SCALER_INT_EN_TIMEOUT (1 << 31)
+#define SCALER_INT_EN_ILLEGAL_BLEND (1 << 24)
+#define SCALER_INT_EN_ILLEGAL_RATIO (1 << 23)
+#define SCALER_INT_EN_ILLEGAL_DST_HEIGHT (1 << 22)
+#define SCALER_INT_EN_ILLEGAL_DST_WIDTH (1 << 21)
+#define SCALER_INT_EN_ILLEGAL_DST_V_POS (1 << 20)
+#define SCALER_INT_EN_ILLEGAL_DST_H_POS (1 << 19)
+#define SCALER_INT_EN_ILLEGAL_DST_C_SPAN (1 << 18)
+#define SCALER_INT_EN_ILLEGAL_DST_Y_SPAN (1 << 17)
+#define SCALER_INT_EN_ILLEGAL_DST_CR_BASE (1 << 16)
+#define SCALER_INT_EN_ILLEGAL_DST_CB_BASE (1 << 15)
+#define SCALER_INT_EN_ILLEGAL_DST_Y_BASE (1 << 14)
+#define SCALER_INT_EN_ILLEGAL_DST_COLOR (1 << 13)
+#define SCALER_INT_EN_ILLEGAL_SRC_HEIGHT (1 << 12)
+#define SCALER_INT_EN_ILLEGAL_SRC_WIDTH (1 << 11)
+#define SCALER_INT_EN_ILLEGAL_SRC_CV_POS (1 << 10)
+#define SCALER_INT_EN_ILLEGAL_SRC_CH_POS (1 << 9)
+#define SCALER_INT_EN_ILLEGAL_SRC_YV_POS (1 << 8)
+#define SCALER_INT_EN_ILLEGAL_SRC_YH_POS (1 << 7)
+#define SCALER_INT_EN_ILLEGAL_DST_SPAN (1 << 6)
+#define SCALER_INT_EN_ILLEGAL_SRC_Y_SPAN (1 << 5)
+#define SCALER_INT_EN_ILLEGAL_SRC_CR_BASE (1 << 4)
+#define SCALER_INT_EN_ILLEGAL_SRC_CB_BASE (1 << 3)
+#define SCALER_INT_EN_ILLEGAL_SRC_Y_BASE (1 << 2)
+#define SCALER_INT_EN_ILLEGAL_SRC_COLOR (1 << 1)
+#define SCALER_INT_EN_FRAME_END (1 << 0)
+
+/* SCALER_INT_STATUS */
+#define SCALER_INT_STATUS_TIMEOUT (1 << 31)
+#define SCALER_INT_STATUS_ILLEGAL_BLEND (1 << 24)
+#define SCALER_INT_STATUS_ILLEGAL_RATIO (1 << 23)
+#define SCALER_INT_STATUS_ILLEGAL_DST_HEIGHT (1 << 22)
+#define SCALER_INT_STATUS_ILLEGAL_DST_WIDTH (1 << 21)
+#define SCALER_INT_STATUS_ILLEGAL_DST_V_POS (1 << 20)
+#define SCALER_INT_STATUS_ILLEGAL_DST_H_POS (1 << 19)
+#define SCALER_INT_STATUS_ILLEGAL_DST_C_SPAN (1 << 18)
+#define SCALER_INT_STATUS_ILLEGAL_DST_Y_SPAN (1 << 17)
+#define SCALER_INT_STATUS_ILLEGAL_DST_CR_BASE (1 << 16)
+#define SCALER_INT_STATUS_ILLEGAL_DST_CB_BASE (1 << 15)
+#define SCALER_INT_STATUS_ILLEGAL_DST_Y_BASE (1 << 14)
+#define SCALER_INT_STATUS_ILLEGAL_DST_COLOR (1 << 13)
+#define SCALER_INT_STATUS_ILLEGAL_SRC_HEIGHT (1 << 12)
+#define SCALER_INT_STATUS_ILLEGAL_SRC_WIDTH (1 << 11)
+#define SCALER_INT_STATUS_ILLEGAL_SRC_CV_POS (1 << 10)
+#define SCALER_INT_STATUS_ILLEGAL_SRC_CH_POS (1 << 9)
+#define SCALER_INT_STATUS_ILLEGAL_SRC_YV_POS (1 << 8)
+#define SCALER_INT_STATUS_ILLEGAL_SRC_YH_POS (1 << 7)
+#define SCALER_INT_STATUS_ILLEGAL_DST_SPAN (1 << 6)
+#define SCALER_INT_STATUS_ILLEGAL_SRC_Y_SPAN (1 << 5)
+#define SCALER_INT_STATUS_ILLEGAL_SRC_CR_BASE (1 << 4)
+#define SCALER_INT_STATUS_ILLEGAL_SRC_CB_BASE (1 << 3)
+#define SCALER_INT_STATUS_ILLEGAL_SRC_Y_BASE (1 << 2)
+#define SCALER_INT_STATUS_ILLEGAL_SRC_COLOR (1 << 1)
+#define SCALER_INT_STATUS_FRAME_END (1 << 0)
+
+/* SCALER_SRC_CFG */
+#define SCALER_SRC_CFG_TILE_EN (1 << 10)
+#define SCALER_SRC_CFG_GET_BYTE_SWAP(r) SCALER_GET(r, 6, 5)
+#define SCALER_SRC_CFG_SET_BYTE_SWAP(v) SCALER_SET(v, 6, 5)
+#define SCALER_SRC_CFG_GET_COLOR_FORMAT(r) SCALER_GET(r, 4, 0)
+#define SCALER_SRC_CFG_SET_COLOR_FORMAT(v) SCALER_SET(v, 4, 0)
+#define SCALER_YUV420_2P_UV 0
+#define SCALER_YUV422_2P_UV 2
+#define SCALER_YUV444_2P_UV 3
+#define SCALER_RGB_565 4
+#define SCALER_ARGB1555 5
+#define SCALER_ARGB8888 6
+#define SCALER_ARGB8888_PRE 7
+#define SCALER_YUV422_1P_YVYU 9
+#define SCALER_YUV422_1P_YUYV 10
+#define SCALER_YUV422_1P_UYVY 11
+#define SCALER_ARGB4444 12
+#define SCALER_L8A8 13
+#define SCALER_RGBA8888 14
+#define SCALER_L8 15
+#define SCALER_YUV420_2P_VU 16
+#define SCALER_YUV422_2P_VU 18
+#define SCALER_YUV444_2P_VU 19
+#define SCALER_YUV420_3P 20
+#define SCALER_YUV422_3P 22
+#define SCALER_YUV444_3P 23
+
+/* SCALER_SRC_SPAN */
+#define SCALER_SRC_SPAN_GET_C_SPAN(r) SCALER_GET(r, 29, 16)
+#define SCALER_SRC_SPAN_SET_C_SPAN(v) SCALER_SET(v, 29, 16)
+#define SCALER_SRC_SPAN_GET_Y_SPAN(r) SCALER_GET(r, 13, 0)
+#define SCALER_SRC_SPAN_SET_Y_SPAN(v) SCALER_SET(v, 13, 0)
+
+/* SCALER_SRC_Y_POS */
+#define SCALER_SRC_Y_POS_GET_YH_POS(r) SCALER_GET(r, 31, 16)
+#define SCALER_SRC_Y_POS_SET_YH_POS(v) SCALER_SET(v, 31, 16)
+#define SCALER_SRC_Y_POS_GET_YV_POS(r) SCALER_GET(r, 15, 0)
+#define SCALER_SRC_Y_POS_SET_YV_POS(v) SCALER_SET(v, 15, 0)
+
+/* SCALER_SRC_WH */
+#define SCALER_SRC_WH_GET_WIDTH(r) SCALER_GET(r, 29, 16)
+#define SCALER_SRC_WH_SET_WIDTH(v) SCALER_SET(v, 29, 16)
+#define SCALER_SRC_WH_GET_HEIGHT(r) SCALER_GET(r, 13, 0)
+#define SCALER_SRC_WH_SET_HEIGHT(v) SCALER_SET(v, 13, 0)
+
+/* SCALER_SRC_C_POS */
+#define SCALER_SRC_C_POS_GET_CH_POS(r) SCALER_GET(r, 31, 16)
+#define SCALER_SRC_C_POS_SET_CH_POS(v) SCALER_SET(v, 31, 16)
+#define SCALER_SRC_C_POS_GET_CV_POS(r) SCALER_GET(r, 15, 0)
+#define SCALER_SRC_C_POS_SET_CV_POS(v) SCALER_SET(v, 15, 0)
+
+/* SCALER_DST_CFG */
+#define SCALER_DST_CFG_GET_BYTE_SWAP(r) SCALER_GET(r, 6, 5)
+#define SCALER_DST_CFG_SET_BYTE_SWAP(v) SCALER_SET(v, 6, 5)
+#define SCALER_DST_CFG_GET_COLOR_FORMAT(r) SCALER_GET(r, 4, 0)
+#define SCALER_DST_CFG_SET_COLOR_FORMAT(v) SCALER_SET(v, 4, 0)
+
+/* SCALER_DST_SPAN */
+#define SCALER_DST_SPAN_GET_C_SPAN(r) SCALER_GET(r, 29, 16)
+#define SCALER_DST_SPAN_SET_C_SPAN(v) SCALER_SET(v, 29, 16)
+#define SCALER_DST_SPAN_GET_Y_SPAN(r) SCALER_GET(r, 13, 0)
+#define SCALER_DST_SPAN_SET_Y_SPAN(v) SCALER_SET(v, 13, 0)
+
+/* SCALER_DST_WH */
+#define SCALER_DST_WH_GET_WIDTH(r) SCALER_GET(r, 29, 16)
+#define SCALER_DST_WH_SET_WIDTH(v) SCALER_SET(v, 29, 16)
+#define SCALER_DST_WH_GET_HEIGHT(r) SCALER_GET(r, 13, 0)
+#define SCALER_DST_WH_SET_HEIGHT(v) SCALER_SET(v, 13, 0)
+
+/* SCALER_DST_POS */
+#define SCALER_DST_POS_GET_H_POS(r) SCALER_GET(r, 29, 16)
+#define SCALER_DST_POS_SET_H_POS(v) SCALER_SET(v, 29, 16)
+#define SCALER_DST_POS_GET_V_POS(r) SCALER_GET(r, 13, 0)
+#define SCALER_DST_POS_SET_V_POS(v) SCALER_SET(v, 13, 0)
+
+/* SCALER_H_RATIO */
+#define SCALER_H_RATIO_GET(r) SCALER_GET(r, 18, 0)
+#define SCALER_H_RATIO_SET(v) SCALER_SET(v, 18, 0)
+
+/* SCALER_V_RATIO */
+#define SCALER_V_RATIO_GET(r) SCALER_GET(r, 18, 0)
+#define SCALER_V_RATIO_SET(v) SCALER_SET(v, 18, 0)
+
+/* SCALER_ROT_CFG */
+#define SCALER_ROT_CFG_FLIP_X_EN (1 << 3)
+#define SCALER_ROT_CFG_FLIP_Y_EN (1 << 2)
+#define SCALER_ROT_CFG_GET_ROTMODE(r) SCALER_GET(r, 1, 0)
+#define SCALER_ROT_CFG_SET_ROTMODE(v) SCALER_SET(v, 1, 0)
+#define SCALER_ROT_MODE_90 1
+#define SCALER_ROT_MODE_180 2
+#define SCALER_ROT_MODE_270 3
+
+/* SCALER_HCOEF, SCALER_VCOEF */
+#define SCALER_COEF_SHIFT(i) (16 * (1 - (i) % 2))
+#define SCALER_COEF_GET(r, i) \
+ (((r) >> SCALER_COEF_SHIFT(i)) & 0x1ff)
+#define SCALER_COEF_SET(v, i) \
+ (((v) & 0x1ff) << SCALER_COEF_SHIFT(i))
+
+/* SCALER_CSC_COEFxy */
+#define SCALER_CSC_COEF_GET(r) SCALER_GET(r, 11, 0)
+#define SCALER_CSC_COEF_SET(v) SCALER_SET(v, 11, 0)
+
+/* SCALER_DITH_CFG */
+#define SCALER_DITH_CFG_GET_R_TYPE(r) SCALER_GET(r, 8, 6)
+#define SCALER_DITH_CFG_SET_R_TYPE(v) SCALER_SET(v, 8, 6)
+#define SCALER_DITH_CFG_GET_G_TYPE(r) SCALER_GET(r, 5, 3)
+#define SCALER_DITH_CFG_SET_G_TYPE(v) SCALER_SET(v, 5, 3)
+#define SCALER_DITH_CFG_GET_B_TYPE(r) SCALER_GET(r, 2, 0)
+#define SCALER_DITH_CFG_SET_B_TYPE(v) SCALER_SET(v, 2, 0)
+
+/* SCALER_TIMEOUT_CTRL */
+#define SCALER_TIMEOUT_CTRL_GET_TIMER_VALUE(r) SCALER_GET(r, 31, 16)
+#define SCALER_TIMEOUT_CTRL_SET_TIMER_VALUE(v) SCALER_SET(v, 31, 16)
+#define SCALER_TIMEOUT_CTRL_GET_TIMER_DIV(r) SCALER_GET(r, 7, 4)
+#define SCALER_TIMEOUT_CTRL_SET_TIMER_DIV(v) SCALER_SET(v, 7, 4)
+#define SCALER_TIMEOUT_CTRL_TIMER_ENABLE (1 << 0)
+
+/* SCALER_TIMEOUT_CNT */
+#define SCALER_TIMEOUT_CTRL_GET_TIMER_COUNT(r) SCALER_GET(r, 31, 16)
+
+/* SCALER_SRC_BLEND_COLOR */
+#define SCALER_SRC_BLEND_COLOR_SEL_INV (1 << 31)
+#define SCALER_SRC_BLEND_COLOR_GET_SEL(r) SCALER_GET(r, 30, 29)
+#define SCALER_SRC_BLEND_COLOR_SET_SEL(v) SCALER_SET(v, 30, 29)
+#define SCALER_SRC_BLEND_COLOR_OP_SEL_INV (1 << 28)
+#define SCALER_SRC_BLEND_COLOR_GET_OP_SEL(r) SCALER_GET(r, 27, 24)
+#define SCALER_SRC_BLEND_COLOR_SET_OP_SEL(v) SCALER_SET(v, 27, 24)
+#define SCALER_SRC_BLEND_COLOR_GET_COLOR0(r) SCALER_GET(r, 23, 16)
+#define SCALER_SRC_BLEND_COLOR_SET_COLOR0(v) SCALER_SET(v, 23, 16)
+#define SCALER_SRC_BLEND_COLOR_GET_COLOR1(r) SCALER_GET(r, 15, 8)
+#define SCALER_SRC_BLEND_COLOR_SET_COLOR1(v) SCALER_SET(v, 15, 8)
+#define SCALER_SRC_BLEND_COLOR_GET_COLOR2(r) SCALER_GET(r, 7, 0)
+#define SCALER_SRC_BLEND_COLOR_SET_COLOR2(v) SCALER_SET(v, 7, 0)
+
+/* SCALER_SRC_BLEND_ALPHA */
+#define SCALER_SRC_BLEND_ALPHA_SEL_INV (1 << 31)
+#define SCALER_SRC_BLEND_ALPHA_GET_SEL(r) SCALER_GET(r, 30, 29)
+#define SCALER_SRC_BLEND_ALPHA_SET_SEL(v) SCALER_SET(v, 30, 29)
+#define SCALER_SRC_BLEND_ALPHA_OP_SEL_INV (1 << 28)
+#define SCALER_SRC_BLEND_ALPHA_GET_OP_SEL(r) SCALER_GET(r, 27, 24)
+#define SCALER_SRC_BLEND_ALPHA_SET_OP_SEL(v) SCALER_SET(v, 27, 24)
+#define SCALER_SRC_BLEND_ALPHA_GET_ALPHA(r) SCALER_GET(r, 7, 0)
+#define SCALER_SRC_BLEND_ALPHA_SET_ALPHA(v) SCALER_SET(v, 7, 0)
+
+/* SCALER_DST_BLEND_COLOR */
+#define SCALER_DST_BLEND_COLOR_SEL_INV (1 << 31)
+#define SCALER_DST_BLEND_COLOR_GET_SEL(r) SCALER_GET(r, 30, 29)
+#define SCALER_DST_BLEND_COLOR_SET_SEL(v) SCALER_SET(v, 30, 29)
+#define SCALER_DST_BLEND_COLOR_OP_SEL_INV (1 << 28)
+#define SCALER_DST_BLEND_COLOR_GET_OP_SEL(r) SCALER_GET(r, 27, 24)
+#define SCALER_DST_BLEND_COLOR_SET_OP_SEL(v) SCALER_SET(v, 27, 24)
+#define SCALER_DST_BLEND_COLOR_GET_COLOR0(r) SCALER_GET(r, 23, 16)
+#define SCALER_DST_BLEND_COLOR_SET_COLOR0(v) SCALER_SET(v, 23, 16)
+#define SCALER_DST_BLEND_COLOR_GET_COLOR1(r) SCALER_GET(r, 15, 8)
+#define SCALER_DST_BLEND_COLOR_SET_COLOR1(v) SCALER_SET(v, 15, 8)
+#define SCALER_DST_BLEND_COLOR_GET_COLOR2(r) SCALER_GET(r, 7, 0)
+#define SCALER_DST_BLEND_COLOR_SET_COLOR2(v) SCALER_SET(v, 7, 0)
+
+/* SCALER_DST_BLEND_ALPHA */
+#define SCALER_DST_BLEND_ALPHA_SEL_INV (1 << 31)
+#define SCALER_DST_BLEND_ALPHA_GET_SEL(r) SCALER_GET(r, 30, 29)
+#define SCALER_DST_BLEND_ALPHA_SET_SEL(v) SCALER_SET(v, 30, 29)
+#define SCALER_DST_BLEND_ALPHA_OP_SEL_INV (1 << 28)
+#define SCALER_DST_BLEND_ALPHA_GET_OP_SEL(r) SCALER_GET(r, 27, 24)
+#define SCALER_DST_BLEND_ALPHA_SET_OP_SEL(v) SCALER_SET(v, 27, 24)
+#define SCALER_DST_BLEND_ALPHA_GET_ALPHA(r) SCALER_GET(r, 7, 0)
+#define SCALER_DST_BLEND_ALPHA_SET_ALPHA(v) SCALER_SET(v, 7, 0)
+
+/* SCALER_FILL_COLOR */
+#define SCALER_FILL_COLOR_GET_ALPHA(r) SCALER_GET(r, 31, 24)
+#define SCALER_FILL_COLOR_SET_ALPHA(v) SCALER_SET(v, 31, 24)
+#define SCALER_FILL_COLOR_GET_FILL_COLOR0(r) SCALER_GET(r, 23, 16)
+#define SCALER_FILL_COLOR_SET_FILL_COLOR0(v) SCALER_SET(v, 23, 16)
+#define SCALER_FILL_COLOR_GET_FILL_COLOR1(r) SCALER_GET(r, 15, 8)
+#define SCALER_FILL_COLOR_SET_FILL_COLOR1(v) SCALER_SET(v, 15, 8)
+#define SCALER_FILL_COLOR_GET_FILL_COLOR2(r) SCALER_GET(r, 7, 0)
+#define SCALER_FILL_COLOR_SET_FILL_COLOR2(v) SCALER_SET(v, 7, 0)
+
+/* SCALER_ADDR_Q_CONFIG */
+#define SCALER_ADDR_Q_CONFIG_RST (1 << 0)
+
+/* SCALER_SRC_ADDR_Q_STATUS */
+#define SCALER_SRC_ADDR_Q_STATUS_Y_FULL (1 << 23)
+#define SCALER_SRC_ADDR_Q_STATUS_Y_EMPTY (1 << 22)
+#define SCALER_SRC_ADDR_Q_STATUS_GET_Y_WR_IDX(r) SCALER_GET(r, 21, 16)
+#define SCALER_SRC_ADDR_Q_STATUS_CB_FULL (1 << 15)
+#define SCALER_SRC_ADDR_Q_STATUS_CB_EMPTY (1 << 14)
+#define SCALER_SRC_ADDR_Q_STATUS_GET_CB_WR_IDX(r) SCALER_GET(r, 13, 8)
+#define SCALER_SRC_ADDR_Q_STATUS_CR_FULL (1 << 7)
+#define SCALER_SRC_ADDR_Q_STATUS_CR_EMPTY (1 << 6)
+#define SCALER_SRC_ADDR_Q_STATUS_GET_CR_WR_IDX(r) SCALER_GET(r, 5, 0)
+
+/* SCALER_DST_ADDR_Q_STATUS */
+#define SCALER_DST_ADDR_Q_STATUS_Y_FULL (1 << 23)
+#define SCALER_DST_ADDR_Q_STATUS_Y_EMPTY (1 << 22)
+#define SCALER_DST_ADDR_Q_STATUS_GET_Y_WR_IDX(r) SCALER_GET(r, 21, 16)
+#define SCALER_DST_ADDR_Q_STATUS_CB_FULL (1 << 15)
+#define SCALER_DST_ADDR_Q_STATUS_CB_EMPTY (1 << 14)
+#define SCALER_DST_ADDR_Q_STATUS_GET_CB_WR_IDX(r) SCALER_GET(r, 13, 8)
+#define SCALER_DST_ADDR_Q_STATUS_CR_FULL (1 << 7)
+#define SCALER_DST_ADDR_Q_STATUS_CR_EMPTY (1 << 6)
+#define SCALER_DST_ADDR_Q_STATUS_GET_CR_WR_IDX(r) SCALER_GET(r, 5, 0)
+
+/* SCALER_CRC_COLOR00_10 */
+#define SCALER_CRC_COLOR00_10_GET_00(r) SCALER_GET(r, 31, 16)
+#define SCALER_CRC_COLOR00_10_GET_10(r) SCALER_GET(r, 15, 0)
+
+/* SCALER_CRC_COLOR20_30 */
+#define SCALER_CRC_COLOR20_30_GET_20(r) SCALER_GET(r, 31, 16)
+#define SCALER_CRC_COLOR20_30_GET_30(r) SCALER_GET(r, 15, 0)
+
+/* SCALER_CRC_COLOR01_11 */
+#define SCALER_CRC_COLOR01_11_GET_01(r) SCALER_GET(r, 31, 16)
+#define SCALER_CRC_COLOR01_11_GET_11(r) SCALER_GET(r, 15, 0)
+
+/* SCALER_CRC_COLOR21_31 */
+#define SCALER_CRC_COLOR21_31_GET_21(r) SCALER_GET(r, 31, 16)
+#define SCALER_CRC_COLOR21_31_GET_31(r) SCALER_GET(r, 15, 0)
+
+#endif /* EXYNOS_REGS_SCALER_H */
diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index 41e6c75a7f3c..f9550ea46c26 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -35,6 +35,7 @@
*/
#define I915_CSR_GLK "i915/glk_dmc_ver1_04.bin"
+MODULE_FIRMWARE(I915_CSR_GLK);
#define GLK_CSR_VERSION_REQUIRED CSR_VERSION(1, 4)
#define I915_CSR_CNL "i915/cnl_dmc_ver1_07.bin"
diff --git a/drivers/gpu/drm/mediatek/Kconfig b/drivers/gpu/drm/mediatek/Kconfig
index 294de4549922..119ec0a21de2 100644
--- a/drivers/gpu/drm/mediatek/Kconfig
+++ b/drivers/gpu/drm/mediatek/Kconfig
@@ -11,6 +11,7 @@ config DRM_MEDIATEK
select DRM_PANEL
select MEMORY
select MTK_SMI
+ select VIDEOMODE_HELPERS
help
Choose this option if you have a Mediatek SoCs.
The module will be called mediatek-drm
diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c
index e80a603e5fb0..6c0ea39d5739 100644
--- a/drivers/gpu/drm/mediatek/mtk_dpi.c
+++ b/drivers/gpu/drm/mediatek/mtk_dpi.c
@@ -22,6 +22,7 @@
#include <linux/interrupt.h>
#include <linux/types.h>
#include <linux/clk.h>
+#include <video/videomode.h>
#include "mtk_dpi_regs.h"
#include "mtk_drm_ddp_comp.h"
@@ -429,34 +430,35 @@ static int mtk_dpi_set_display_mode(struct mtk_dpi *dpi,
struct mtk_dpi_sync_param vsync_leven = { 0 };
struct mtk_dpi_sync_param vsync_rodd = { 0 };
struct mtk_dpi_sync_param vsync_reven = { 0 };
- unsigned long pix_rate;
+ struct videomode vm = { 0 };
unsigned long pll_rate;
unsigned int factor;
/* let pll_rate can fix the valid range of tvdpll (1G~2GHz) */
- pix_rate = 1000UL * mode->clock;
+
if (mode->clock <= 27000)
- factor = 16 * 3;
+ factor = 3 << 4;
else if (mode->clock <= 84000)
- factor = 8 * 3;
+ factor = 3 << 3;
else if (mode->clock <= 167000)
- factor = 4 * 3;
+ factor = 3 << 2;
else
- factor = 2 * 3;
- pll_rate = pix_rate * factor;
+ factor = 3 << 1;
+ drm_display_mode_to_videomode(mode, &vm);
+ pll_rate = vm.pixelclock * factor;
dev_dbg(dpi->dev, "Want PLL %lu Hz, pixel clock %lu Hz\n",
- pll_rate, pix_rate);
+ pll_rate, vm.pixelclock);
clk_set_rate(dpi->tvd_clk, pll_rate);
pll_rate = clk_get_rate(dpi->tvd_clk);
- pix_rate = pll_rate / factor;
- clk_set_rate(dpi->pixel_clk, pix_rate);
- pix_rate = clk_get_rate(dpi->pixel_clk);
+ vm.pixelclock = pll_rate / factor;
+ clk_set_rate(dpi->pixel_clk, vm.pixelclock);
+ vm.pixelclock = clk_get_rate(dpi->pixel_clk);
dev_dbg(dpi->dev, "Got PLL %lu Hz, pixel clock %lu Hz\n",
- pll_rate, pix_rate);
+ pll_rate, vm.pixelclock);
limit.c_bottom = 0x0010;
limit.c_top = 0x0FE0;
@@ -465,33 +467,31 @@ static int mtk_dpi_set_display_mode(struct mtk_dpi *dpi,
dpi_pol.ck_pol = MTK_DPI_POLARITY_FALLING;
dpi_pol.de_pol = MTK_DPI_POLARITY_RISING;
- dpi_pol.hsync_pol = mode->flags & DRM_MODE_FLAG_PHSYNC ?
+ dpi_pol.hsync_pol = vm.flags & DISPLAY_FLAGS_HSYNC_HIGH ?
MTK_DPI_POLARITY_FALLING : MTK_DPI_POLARITY_RISING;
- dpi_pol.vsync_pol = mode->flags & DRM_MODE_FLAG_PVSYNC ?
+ dpi_pol.vsync_pol = vm.flags & DISPLAY_FLAGS_VSYNC_HIGH ?
MTK_DPI_POLARITY_FALLING : MTK_DPI_POLARITY_RISING;
-
- hsync.sync_width = mode->hsync_end - mode->hsync_start;
- hsync.back_porch = mode->htotal - mode->hsync_end;
- hsync.front_porch = mode->hsync_start - mode->hdisplay;
+ hsync.sync_width = vm.hsync_len;
+ hsync.back_porch = vm.hback_porch;
+ hsync.front_porch = vm.hfront_porch;
hsync.shift_half_line = false;
-
- vsync_lodd.sync_width = mode->vsync_end - mode->vsync_start;
- vsync_lodd.back_porch = mode->vtotal - mode->vsync_end;
- vsync_lodd.front_porch = mode->vsync_start - mode->vdisplay;
+ vsync_lodd.sync_width = vm.vsync_len;
+ vsync_lodd.back_porch = vm.vback_porch;
+ vsync_lodd.front_porch = vm.vfront_porch;
vsync_lodd.shift_half_line = false;
- if (mode->flags & DRM_MODE_FLAG_INTERLACE &&
+ if (vm.flags & DISPLAY_FLAGS_INTERLACED &&
mode->flags & DRM_MODE_FLAG_3D_MASK) {
vsync_leven = vsync_lodd;
vsync_rodd = vsync_lodd;
vsync_reven = vsync_lodd;
vsync_leven.shift_half_line = true;
vsync_reven.shift_half_line = true;
- } else if (mode->flags & DRM_MODE_FLAG_INTERLACE &&
+ } else if (vm.flags & DISPLAY_FLAGS_INTERLACED &&
!(mode->flags & DRM_MODE_FLAG_3D_MASK)) {
vsync_leven = vsync_lodd;
vsync_leven.shift_half_line = true;
- } else if (!(mode->flags & DRM_MODE_FLAG_INTERLACE) &&
+ } else if (!(vm.flags & DISPLAY_FLAGS_INTERLACED) &&
mode->flags & DRM_MODE_FLAG_3D_MASK) {
vsync_rodd = vsync_lodd;
}
@@ -505,12 +505,12 @@ static int mtk_dpi_set_display_mode(struct mtk_dpi *dpi,
mtk_dpi_config_vsync_reven(dpi, &vsync_reven);
mtk_dpi_config_3d(dpi, !!(mode->flags & DRM_MODE_FLAG_3D_MASK));
- mtk_dpi_config_interface(dpi, !!(mode->flags &
- DRM_MODE_FLAG_INTERLACE));
- if (mode->flags & DRM_MODE_FLAG_INTERLACE)
- mtk_dpi_config_fb_size(dpi, mode->hdisplay, mode->vdisplay / 2);
+ mtk_dpi_config_interface(dpi, !!(vm.flags &
+ DISPLAY_FLAGS_INTERLACED));
+ if (vm.flags & DISPLAY_FLAGS_INTERLACED)
+ mtk_dpi_config_fb_size(dpi, vm.hactive, vm.vactive >> 1);
else
- mtk_dpi_config_fb_size(dpi, mode->hdisplay, mode->vdisplay);
+ mtk_dpi_config_fb_size(dpi, vm.hactive, vm.vactive);
mtk_dpi_config_channel_limit(dpi, &limit);
mtk_dpi_config_bit_num(dpi, dpi->bit_num);
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_gem.c b/drivers/gpu/drm/mediatek/mtk_drm_gem.c
index f595ac816b55..259b7b0de1d2 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_gem.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_gem.c
@@ -220,7 +220,7 @@ struct drm_gem_object *mtk_gem_prime_import_sg_table(struct drm_device *dev,
mtk_gem = mtk_drm_gem_init(dev, attach->dmabuf->size);
if (IS_ERR(mtk_gem))
- return ERR_PTR(PTR_ERR(mtk_gem));
+ return ERR_CAST(mtk_gem);
expected = sg_dma_address(sg->sgl);
for_each_sg(sg->sgl, s, sg->nents, i) {
diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c
index 7e5e24c2152a..aa0943ec32b0 100644
--- a/drivers/gpu/drm/mediatek/mtk_dsi.c
+++ b/drivers/gpu/drm/mediatek/mtk_dsi.c
@@ -551,13 +551,12 @@ static int mtk_dsi_poweron(struct mtk_dsi *dsi)
}
/**
- * vm.pixelclock is in kHz, pixel_clock unit is Hz, so multiply by 1000
* htotal_time = htotal * byte_per_pixel / num_lanes
* overhead_time = lpx + hs_prepare + hs_zero + hs_trail + hs_exit
* mipi_ratio = (htotal_time + overhead_time) / htotal_time
* data_rate = pixel_clock * bit_per_pixel * mipi_ratio / num_lanes;
*/
- pixel_clock = dsi->vm.pixelclock * 1000;
+ pixel_clock = dsi->vm.pixelclock;
htotal = dsi->vm.hactive + dsi->vm.hback_porch + dsi->vm.hfront_porch +
dsi->vm.hsync_len;
htotal_bits = htotal * bit_per_pixel;
@@ -725,16 +724,7 @@ static void mtk_dsi_encoder_mode_set(struct drm_encoder *encoder,
{
struct mtk_dsi *dsi = encoder_to_dsi(encoder);
- dsi->vm.pixelclock = adjusted->clock;
- dsi->vm.hactive = adjusted->hdisplay;
- dsi->vm.hback_porch = adjusted->htotal - adjusted->hsync_end;
- dsi->vm.hfront_porch = adjusted->hsync_start - adjusted->hdisplay;
- dsi->vm.hsync_len = adjusted->hsync_end - adjusted->hsync_start;
-
- dsi->vm.vactive = adjusted->vdisplay;
- dsi->vm.vback_porch = adjusted->vtotal - adjusted->vsync_end;
- dsi->vm.vfront_porch = adjusted->vsync_start - adjusted->vdisplay;
- dsi->vm.vsync_len = adjusted->vsync_end - adjusted->vsync_start;
+ drm_display_mode_to_videomode(adjusted, &dsi->vm);
}
static void mtk_dsi_encoder_disable(struct drm_encoder *encoder)
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
index c4420538ec85..f2a0bd1e5119 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
@@ -767,7 +767,8 @@ static irqreturn_t rcar_du_crtc_irq(int irq, void *arg)
* Initialization
*/
-int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index)
+int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int swindex,
+ unsigned int hwindex)
{
static const unsigned int mmio_offsets[] = {
DU0_REG_OFFSET, DU1_REG_OFFSET, DU2_REG_OFFSET, DU3_REG_OFFSET
@@ -775,7 +776,7 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index)
struct rcar_du_device *rcdu = rgrp->dev;
struct platform_device *pdev = to_platform_device(rcdu->dev);
- struct rcar_du_crtc *rcrtc = &rcdu->crtcs[index];
+ struct rcar_du_crtc *rcrtc = &rcdu->crtcs[swindex];
struct drm_crtc *crtc = &rcrtc->crtc;
struct drm_plane *primary;
unsigned int irqflags;
@@ -787,7 +788,7 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index)
/* Get the CRTC clock and the optional external clock. */
if (rcar_du_has(rcdu, RCAR_DU_FEATURE_CRTC_IRQ_CLOCK)) {
- sprintf(clk_name, "du.%u", index);
+ sprintf(clk_name, "du.%u", hwindex);
name = clk_name;
} else {
name = NULL;
@@ -795,16 +796,16 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index)
rcrtc->clock = devm_clk_get(rcdu->dev, name);
if (IS_ERR(rcrtc->clock)) {
- dev_err(rcdu->dev, "no clock for CRTC %u\n", index);
+ dev_err(rcdu->dev, "no clock for DU channel %u\n", hwindex);
return PTR_ERR(rcrtc->clock);
}
- sprintf(clk_name, "dclkin.%u", index);
+ sprintf(clk_name, "dclkin.%u", hwindex);
clk = devm_clk_get(rcdu->dev, clk_name);
if (!IS_ERR(clk)) {
rcrtc->extclock = clk;
} else if (PTR_ERR(rcrtc->clock) == -EPROBE_DEFER) {
- dev_info(rcdu->dev, "can't get external clock %u\n", index);
+ dev_info(rcdu->dev, "can't get external clock %u\n", hwindex);
return -EPROBE_DEFER;
}
@@ -813,13 +814,13 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index)
spin_lock_init(&rcrtc->vblank_lock);
rcrtc->group = rgrp;
- rcrtc->mmio_offset = mmio_offsets[index];
- rcrtc->index = index;
+ rcrtc->mmio_offset = mmio_offsets[hwindex];
+ rcrtc->index = hwindex;
if (rcar_du_has(rcdu, RCAR_DU_FEATURE_VSP1_SOURCE))
primary = &rcrtc->vsp->planes[rcrtc->vsp_pipe].plane;
else
- primary = &rgrp->planes[index % 2].plane;
+ primary = &rgrp->planes[swindex % 2].plane;
ret = drm_crtc_init_with_planes(rcdu->ddev, crtc, primary,
NULL, &crtc_funcs, NULL);
@@ -833,7 +834,8 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index)
/* Register the interrupt handler. */
if (rcar_du_has(rcdu, RCAR_DU_FEATURE_CRTC_IRQ_CLOCK)) {
- irq = platform_get_irq(pdev, index);
+ /* The IRQ's are associated with the CRTC (sw)index. */
+ irq = platform_get_irq(pdev, swindex);
irqflags = 0;
} else {
irq = platform_get_irq(pdev, 0);
@@ -841,7 +843,7 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index)
}
if (irq < 0) {
- dev_err(rcdu->dev, "no IRQ for CRTC %u\n", index);
+ dev_err(rcdu->dev, "no IRQ for CRTC %u\n", swindex);
return irq;
}
@@ -849,7 +851,7 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index)
dev_name(rcdu->dev), rcrtc);
if (ret < 0) {
dev_err(rcdu->dev,
- "failed to register IRQ for CRTC %u\n", index);
+ "failed to register IRQ for CRTC %u\n", swindex);
return ret;
}
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h
index fdc2bf99bda1..84b5e23a85b1 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h
@@ -80,7 +80,8 @@ enum rcar_du_output {
RCAR_DU_OUTPUT_MAX,
};
-int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index);
+int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int swindex,
+ unsigned int hwindex);
void rcar_du_crtc_suspend(struct rcar_du_crtc *rcrtc);
void rcar_du_crtc_resume(struct rcar_du_crtc *rcrtc);
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_drv.c b/drivers/gpu/drm/rcar-du/rcar_du_drv.c
index 3917d839c04c..02aee6cb0e53 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_drv.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_drv.c
@@ -40,7 +40,7 @@ static const struct rcar_du_device_info rzg1_du_r8a7743_info = {
.gen = 2,
.features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
| RCAR_DU_FEATURE_EXT_CTRL_REGS,
- .num_crtcs = 2,
+ .channels_mask = BIT(1) | BIT(0),
.routes = {
/*
* R8A7743 has one RGB output and one LVDS output
@@ -61,7 +61,7 @@ static const struct rcar_du_device_info rzg1_du_r8a7745_info = {
.gen = 2,
.features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
| RCAR_DU_FEATURE_EXT_CTRL_REGS,
- .num_crtcs = 2,
+ .channels_mask = BIT(1) | BIT(0),
.routes = {
/*
* R8A7745 has two RGB outputs
@@ -80,7 +80,7 @@ static const struct rcar_du_device_info rzg1_du_r8a7745_info = {
static const struct rcar_du_device_info rcar_du_r8a7779_info = {
.gen = 2,
.features = 0,
- .num_crtcs = 2,
+ .channels_mask = BIT(1) | BIT(0),
.routes = {
/*
* R8A7779 has two RGB outputs and one (currently unsupported)
@@ -102,7 +102,7 @@ static const struct rcar_du_device_info rcar_du_r8a7790_info = {
.features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
| RCAR_DU_FEATURE_EXT_CTRL_REGS,
.quirks = RCAR_DU_QUIRK_ALIGN_128B,
- .num_crtcs = 3,
+ .channels_mask = BIT(2) | BIT(1) | BIT(0),
.routes = {
/*
* R8A7790 has one RGB output, two LVDS outputs and one
@@ -129,7 +129,7 @@ static const struct rcar_du_device_info rcar_du_r8a7791_info = {
.gen = 2,
.features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
| RCAR_DU_FEATURE_EXT_CTRL_REGS,
- .num_crtcs = 2,
+ .channels_mask = BIT(1) | BIT(0),
.routes = {
/*
* R8A779[13] has one RGB output, one LVDS output and one
@@ -151,7 +151,7 @@ static const struct rcar_du_device_info rcar_du_r8a7792_info = {
.gen = 2,
.features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
| RCAR_DU_FEATURE_EXT_CTRL_REGS,
- .num_crtcs = 2,
+ .channels_mask = BIT(1) | BIT(0),
.routes = {
/* R8A7792 has two RGB outputs. */
[RCAR_DU_OUTPUT_DPAD0] = {
@@ -169,7 +169,7 @@ static const struct rcar_du_device_info rcar_du_r8a7794_info = {
.gen = 2,
.features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
| RCAR_DU_FEATURE_EXT_CTRL_REGS,
- .num_crtcs = 2,
+ .channels_mask = BIT(1) | BIT(0),
.routes = {
/*
* R8A7794 has two RGB outputs and one (currently unsupported)
@@ -191,7 +191,7 @@ static const struct rcar_du_device_info rcar_du_r8a7795_info = {
.features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
| RCAR_DU_FEATURE_EXT_CTRL_REGS
| RCAR_DU_FEATURE_VSP1_SOURCE,
- .num_crtcs = 4,
+ .channels_mask = BIT(3) | BIT(2) | BIT(1) | BIT(0),
.routes = {
/*
* R8A7795 has one RGB output, two HDMI outputs and one
@@ -215,7 +215,7 @@ static const struct rcar_du_device_info rcar_du_r8a7795_info = {
},
},
.num_lvds = 1,
- .dpll_ch = BIT(1) | BIT(2),
+ .dpll_ch = BIT(2) | BIT(1),
};
static const struct rcar_du_device_info rcar_du_r8a7796_info = {
@@ -223,7 +223,7 @@ static const struct rcar_du_device_info rcar_du_r8a7796_info = {
.features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
| RCAR_DU_FEATURE_EXT_CTRL_REGS
| RCAR_DU_FEATURE_VSP1_SOURCE,
- .num_crtcs = 3,
+ .channels_mask = BIT(2) | BIT(1) | BIT(0),
.routes = {
/*
* R8A7796 has one RGB output, one LVDS output and one HDMI
@@ -246,12 +246,40 @@ static const struct rcar_du_device_info rcar_du_r8a7796_info = {
.dpll_ch = BIT(1),
};
+static const struct rcar_du_device_info rcar_du_r8a77965_info = {
+ .gen = 3,
+ .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
+ | RCAR_DU_FEATURE_EXT_CTRL_REGS
+ | RCAR_DU_FEATURE_VSP1_SOURCE,
+ .channels_mask = BIT(3) | BIT(1) | BIT(0),
+ .routes = {
+ /*
+ * R8A77965 has one RGB output, one LVDS output and one HDMI
+ * output.
+ */
+ [RCAR_DU_OUTPUT_DPAD0] = {
+ .possible_crtcs = BIT(2),
+ .port = 0,
+ },
+ [RCAR_DU_OUTPUT_HDMI0] = {
+ .possible_crtcs = BIT(1),
+ .port = 1,
+ },
+ [RCAR_DU_OUTPUT_LVDS0] = {
+ .possible_crtcs = BIT(0),
+ .port = 2,
+ },
+ },
+ .num_lvds = 1,
+ .dpll_ch = BIT(1),
+};
+
static const struct rcar_du_device_info rcar_du_r8a77970_info = {
.gen = 3,
.features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
| RCAR_DU_FEATURE_EXT_CTRL_REGS
| RCAR_DU_FEATURE_VSP1_SOURCE,
- .num_crtcs = 1,
+ .channels_mask = BIT(0),
.routes = {
/* R8A77970 has one RGB output and one LVDS output. */
[RCAR_DU_OUTPUT_DPAD0] = {
@@ -277,6 +305,7 @@ static const struct of_device_id rcar_du_of_table[] = {
{ .compatible = "renesas,du-r8a7794", .data = &rcar_du_r8a7794_info },
{ .compatible = "renesas,du-r8a7795", .data = &rcar_du_r8a7795_info },
{ .compatible = "renesas,du-r8a7796", .data = &rcar_du_r8a7796_info },
+ { .compatible = "renesas,du-r8a77965", .data = &rcar_du_r8a77965_info },
{ .compatible = "renesas,du-r8a77970", .data = &rcar_du_r8a77970_info },
{ }
};
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_drv.h b/drivers/gpu/drm/rcar-du/rcar_du_drv.h
index 131d8e88b06c..b3a25e8e07d0 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_drv.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_drv.h
@@ -52,7 +52,7 @@ struct rcar_du_output_routing {
* @gen: device generation (2 or 3)
* @features: device features (RCAR_DU_FEATURE_*)
* @quirks: device quirks (RCAR_DU_QUIRK_*)
- * @num_crtcs: total number of CRTCs
+ * @channels_mask: bit mask of available DU channels
* @routes: array of CRTC to output routes, indexed by output (RCAR_DU_OUTPUT_*)
* @num_lvds: number of internal LVDS encoders
*/
@@ -60,7 +60,7 @@ struct rcar_du_device_info {
unsigned int gen;
unsigned int features;
unsigned int quirks;
- unsigned int num_crtcs;
+ unsigned int channels_mask;
struct rcar_du_output_routing routes[RCAR_DU_OUTPUT_MAX];
unsigned int num_lvds;
unsigned int dpll_ch;
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_group.c b/drivers/gpu/drm/rcar-du/rcar_du_group.c
index 2f37ea901873..d539cb290a35 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_group.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_group.c
@@ -46,10 +46,13 @@ void rcar_du_group_write(struct rcar_du_group *rgrp, u32 reg, u32 data)
static void rcar_du_group_setup_pins(struct rcar_du_group *rgrp)
{
- u32 defr6 = DEFR6_CODE | DEFR6_ODPM12_DISP;
+ u32 defr6 = DEFR6_CODE;
- if (rgrp->num_crtcs > 1)
- defr6 |= DEFR6_ODPM22_DISP;
+ if (rgrp->channels_mask & BIT(0))
+ defr6 |= DEFR6_ODPM02_DISP;
+
+ if (rgrp->channels_mask & BIT(1))
+ defr6 |= DEFR6_ODPM12_DISP;
rcar_du_group_write(rgrp, DEFR6, defr6);
}
@@ -80,10 +83,11 @@ static void rcar_du_group_setup_defr8(struct rcar_du_group *rgrp)
* On Gen3 VSPD routing can't be configured, but DPAD routing
* needs to be set despite having a single option available.
*/
- u32 crtc = ffs(possible_crtcs) - 1;
+ unsigned int rgb_crtc = ffs(possible_crtcs) - 1;
+ struct rcar_du_crtc *crtc = &rcdu->crtcs[rgb_crtc];
- if (crtc / 2 == rgrp->index)
- defr8 |= DEFR8_DRGBS_DU(crtc);
+ if (crtc->index / 2 == rgrp->index)
+ defr8 |= DEFR8_DRGBS_DU(crtc->index);
}
rcar_du_group_write(rgrp, DEFR8, defr8);
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_group.h b/drivers/gpu/drm/rcar-du/rcar_du_group.h
index 5e3adc6b31b5..42105aedecc8 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_group.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_group.h
@@ -25,6 +25,7 @@ struct rcar_du_device;
* @dev: the DU device
* @mmio_offset: registers offset in the device memory map
* @index: group index
+ * @channels_mask: bitmask of populated DU channels in this group
* @num_crtcs: number of CRTCs in this group (1 or 2)
* @use_count: number of users of the group (rcar_du_group_(get|put))
* @used_crtcs: number of CRTCs currently in use
@@ -39,6 +40,7 @@ struct rcar_du_group {
unsigned int mmio_offset;
unsigned int index;
+ unsigned int channels_mask;
unsigned int num_crtcs;
unsigned int use_count;
unsigned int used_crtcs;
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_kms.c b/drivers/gpu/drm/rcar-du/rcar_du_kms.c
index f4ac0f884f00..f0bc7cc0e913 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_kms.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_kms.c
@@ -428,7 +428,7 @@ static int rcar_du_vsps_init(struct rcar_du_device *rcdu)
struct {
struct device_node *np;
unsigned int crtcs_mask;
- } vsps[RCAR_DU_MAX_VSPS] = { { 0, }, };
+ } vsps[RCAR_DU_MAX_VSPS] = { { NULL, }, };
unsigned int vsps_count = 0;
unsigned int cells;
unsigned int i;
@@ -507,6 +507,8 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu)
struct drm_fbdev_cma *fbdev;
unsigned int num_encoders;
unsigned int num_groups;
+ unsigned int swindex;
+ unsigned int hwindex;
unsigned int i;
int ret;
@@ -520,7 +522,7 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu)
dev->mode_config.funcs = &rcar_du_mode_config_funcs;
dev->mode_config.helper_private = &rcar_du_mode_config_helper;
- rcdu->num_crtcs = rcdu->info->num_crtcs;
+ rcdu->num_crtcs = hweight8(rcdu->info->channels_mask);
ret = rcar_du_properties_init(rcdu);
if (ret < 0)
@@ -530,7 +532,7 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu)
* Initialize vertical blanking interrupts handling. Start with vblank
* disabled for all CRTCs.
*/
- ret = drm_vblank_init(dev, (1 << rcdu->info->num_crtcs) - 1);
+ ret = drm_vblank_init(dev, (1 << rcdu->num_crtcs) - 1);
if (ret < 0)
return ret;
@@ -545,7 +547,10 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu)
rgrp->dev = rcdu;
rgrp->mmio_offset = mmio_offsets[i];
rgrp->index = i;
- rgrp->num_crtcs = min(rcdu->num_crtcs - 2 * i, 2U);
+ /* Extract the channel mask for this group only. */
+ rgrp->channels_mask = (rcdu->info->channels_mask >> (2 * i))
+ & GENMASK(1, 0);
+ rgrp->num_crtcs = hweight8(rgrp->channels_mask);
/*
* If we have more than one CRTCs in this group pre-associate
@@ -572,10 +577,16 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu)
}
/* Create the CRTCs. */
- for (i = 0; i < rcdu->num_crtcs; ++i) {
- struct rcar_du_group *rgrp = &rcdu->groups[i / 2];
+ for (swindex = 0, hwindex = 0; swindex < rcdu->num_crtcs; ++hwindex) {
+ struct rcar_du_group *rgrp;
+
+ /* Skip unpopulated DU channels. */
+ if (!(rcdu->info->channels_mask & BIT(hwindex)))
+ continue;
+
+ rgrp = &rcdu->groups[hwindex / 2];
- ret = rcar_du_crtc_create(rgrp, i);
+ ret = rcar_du_crtc_create(rgrp, swindex++, hwindex);
if (ret < 0)
return ret;
}
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_of.c b/drivers/gpu/drm/rcar-du/rcar_du_of.c
index 68a0b82cb17e..afef69669bb4 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_of.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_of.c
@@ -18,6 +18,7 @@
#include "rcar_du_crtc.h"
#include "rcar_du_drv.h"
+#include "rcar_du_of.h"
/* -----------------------------------------------------------------------------
* Generic Overlay Handling
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_regs.h b/drivers/gpu/drm/rcar-du/rcar_du_regs.h
index d5bae99d3cfe..9dfd220ceda1 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_regs.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_regs.h
@@ -187,14 +187,14 @@
#define DEFR6 0x000e8
#define DEFR6_CODE (0x7778 << 16)
-#define DEFR6_ODPM22_DSMR (0 << 10)
-#define DEFR6_ODPM22_DISP (2 << 10)
-#define DEFR6_ODPM22_CDE (3 << 10)
-#define DEFR6_ODPM22_MASK (3 << 10)
-#define DEFR6_ODPM12_DSMR (0 << 8)
-#define DEFR6_ODPM12_DISP (2 << 8)
-#define DEFR6_ODPM12_CDE (3 << 8)
-#define DEFR6_ODPM12_MASK (3 << 8)
+#define DEFR6_ODPM12_DSMR (0 << 10)
+#define DEFR6_ODPM12_DISP (2 << 10)
+#define DEFR6_ODPM12_CDE (3 << 10)
+#define DEFR6_ODPM12_MASK (3 << 10)
+#define DEFR6_ODPM02_DSMR (0 << 8)
+#define DEFR6_ODPM02_DISP (2 << 8)
+#define DEFR6_ODPM02_CDE (3 << 8)
+#define DEFR6_ODPM02_MASK (3 << 8)
#define DEFR6_TCNE1 (1 << 6)
#define DEFR6_TCNE0 (1 << 4)
#define DEFR6_MLOS1 (1 << 2)
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
index b3bec0125696..27a440886b17 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
@@ -17,6 +17,7 @@
#include <drm/drm_crtc_helper.h>
#include <drm/drm_fb_cma_helper.h>
#include <drm/drm_gem_cma_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
#include <drm/drm_plane_helper.h>
#include <linux/bitops.h>
@@ -237,6 +238,10 @@ static int rcar_du_vsp_plane_prepare_fb(struct drm_plane *plane,
}
}
+ ret = drm_gem_fb_prepare_fb(plane, state);
+ if (ret)
+ goto fail;
+
return 0;
fail:
@@ -299,18 +304,17 @@ static const struct drm_plane_helper_funcs rcar_du_vsp_plane_helper_funcs = {
static struct drm_plane_state *
rcar_du_vsp_plane_atomic_duplicate_state(struct drm_plane *plane)
{
- struct rcar_du_vsp_plane_state *state;
struct rcar_du_vsp_plane_state *copy;
if (WARN_ON(!plane->state))
return NULL;
- state = to_rcar_vsp_plane_state(plane->state);
- copy = kmemdup(state, sizeof(*state), GFP_KERNEL);
+ copy = kzalloc(sizeof(*copy), GFP_KERNEL);
if (copy == NULL)
return NULL;
__drm_atomic_helper_plane_duplicate_state(plane, &copy->state);
+ copy->alpha = to_rcar_vsp_plane_state(plane->state)->alpha;
return &copy->state;
}
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
index 1b278a22c8b7..1067e702c22c 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
@@ -224,7 +224,7 @@ static void tilcdc_crtc_set_clk(struct drm_crtc *crtc)
ret = clk_set_rate(priv->clk, req_rate * clkdiv);
clk_rate = clk_get_rate(priv->clk);
- if (ret < 0) {
+ if (ret < 0 || tilcdc_pclk_diff(req_rate, clk_rate) > 5) {
/*
* If we fail to set the clock rate (some architectures don't
* use the common clock framework yet and may not implement
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index 83d3b7912fc2..c8650bbcbcb3 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -741,6 +741,7 @@ static irqreturn_t vc4_crtc_irq_handler(int irq, void *data)
struct vc4_async_flip_state {
struct drm_crtc *crtc;
struct drm_framebuffer *fb;
+ struct drm_framebuffer *old_fb;
struct drm_pending_vblank_event *event;
struct vc4_seqno_cb cb;
@@ -770,6 +771,23 @@ vc4_async_page_flip_complete(struct vc4_seqno_cb *cb)
drm_crtc_vblank_put(crtc);
drm_framebuffer_put(flip_state->fb);
+
+ /* Decrement the BO usecnt in order to keep the inc/dec calls balanced
+ * when the planes are updated through the async update path.
+ * FIXME: we should move to generic async-page-flip when it's
+ * available, so that we can get rid of this hand-made cleanup_fb()
+ * logic.
+ */
+ if (flip_state->old_fb) {
+ struct drm_gem_cma_object *cma_bo;
+ struct vc4_bo *bo;
+
+ cma_bo = drm_fb_cma_get_gem_obj(flip_state->old_fb, 0);
+ bo = to_vc4_bo(&cma_bo->base);
+ vc4_bo_dec_usecnt(bo);
+ drm_framebuffer_put(flip_state->old_fb);
+ }
+
kfree(flip_state);
up(&vc4->async_modeset);
@@ -794,9 +812,22 @@ static int vc4_async_page_flip(struct drm_crtc *crtc,
struct drm_gem_cma_object *cma_bo = drm_fb_cma_get_gem_obj(fb, 0);
struct vc4_bo *bo = to_vc4_bo(&cma_bo->base);
+ /* Increment the BO usecnt here, so that we never end up with an
+ * unbalanced number of vc4_bo_{dec,inc}_usecnt() calls when the
+ * plane is later updated through the non-async path.
+ * FIXME: we should move to generic async-page-flip when it's
+ * available, so that we can get rid of this hand-made prepare_fb()
+ * logic.
+ */
+ ret = vc4_bo_inc_usecnt(bo);
+ if (ret)
+ return ret;
+
flip_state = kzalloc(sizeof(*flip_state), GFP_KERNEL);
- if (!flip_state)
+ if (!flip_state) {
+ vc4_bo_dec_usecnt(bo);
return -ENOMEM;
+ }
drm_framebuffer_get(fb);
flip_state->fb = fb;
@@ -807,10 +838,23 @@ static int vc4_async_page_flip(struct drm_crtc *crtc,
ret = down_interruptible(&vc4->async_modeset);
if (ret) {
drm_framebuffer_put(fb);
+ vc4_bo_dec_usecnt(bo);
kfree(flip_state);
return ret;
}
+ /* Save the current FB before it's replaced by the new one in
+ * drm_atomic_set_fb_for_plane(). We'll need the old FB in
+ * vc4_async_page_flip_complete() to decrement the BO usecnt and keep
+ * it consistent.
+ * FIXME: we should move to generic async-page-flip when it's
+ * available, so that we can get rid of this hand-made cleanup_fb()
+ * logic.
+ */
+ flip_state->old_fb = plane->state->fb;
+ if (flip_state->old_fb)
+ drm_framebuffer_get(flip_state->old_fb);
+
WARN_ON(drm_crtc_vblank_get(crtc) != 0);
/* Immediately update the plane's legacy fb pointer, so that later
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
index 2582ffd36bb5..ba0cdb743c3e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
@@ -441,11 +441,11 @@ static int vmwgfx_set_config_internal(struct drm_mode_set *set)
struct drm_crtc *crtc = set->crtc;
struct drm_framebuffer *fb;
struct drm_crtc *tmp;
- struct drm_modeset_acquire_ctx *ctx;
struct drm_device *dev = set->crtc->dev;
+ struct drm_modeset_acquire_ctx ctx;
int ret;
- ctx = dev->mode_config.acquire_ctx;
+ drm_modeset_acquire_init(&ctx, 0);
restart:
/*
@@ -458,7 +458,7 @@ restart:
fb = set->fb;
- ret = crtc->funcs->set_config(set, ctx);
+ ret = crtc->funcs->set_config(set, &ctx);
if (ret == 0) {
crtc->primary->crtc = crtc;
crtc->primary->fb = fb;
@@ -473,20 +473,13 @@ restart:
}
if (ret == -EDEADLK) {
- dev->mode_config.acquire_ctx = NULL;
-
-retry_locking:
- drm_modeset_backoff(ctx);
-
- ret = drm_modeset_lock_all_ctx(dev, ctx);
- if (ret)
- goto retry_locking;
-
- dev->mode_config.acquire_ctx = ctx;
-
+ drm_modeset_backoff(&ctx);
goto restart;
}
+ drm_modeset_drop_locks(&ctx);
+ drm_modeset_acquire_fini(&ctx);
+
return ret;
}
@@ -624,7 +617,6 @@ static int vmw_fb_set_par(struct fb_info *info)
}
mutex_lock(&par->bo_mutex);
- drm_modeset_lock_all(vmw_priv->dev);
ret = vmw_fb_kms_framebuffer(info);
if (ret)
goto out_unlock;
@@ -657,7 +649,6 @@ out_unlock:
drm_mode_destroy(vmw_priv->dev, old_mode);
par->set_mode = mode;
- drm_modeset_unlock_all(vmw_priv->dev);
mutex_unlock(&par->bo_mutex);
return ret;
@@ -713,18 +704,14 @@ int vmw_fb_init(struct vmw_private *vmw_priv)
par->max_width = fb_width;
par->max_height = fb_height;
- drm_modeset_lock_all(vmw_priv->dev);
ret = vmw_kms_fbdev_init_data(vmw_priv, 0, par->max_width,
par->max_height, &par->con,
&par->crtc, &init_mode);
- if (ret) {
- drm_modeset_unlock_all(vmw_priv->dev);
+ if (ret)
goto err_kms;
- }
info->var.xres = init_mode->hdisplay;
info->var.yres = init_mode->vdisplay;
- drm_modeset_unlock_all(vmw_priv->dev);
/*
* Create buffers and alloc memory
@@ -832,7 +819,9 @@ int vmw_fb_close(struct vmw_private *vmw_priv)
cancel_delayed_work_sync(&par->local_work);
unregister_framebuffer(info);
+ mutex_lock(&par->bo_mutex);
(void) vmw_fb_kms_detach(par, true, true);
+ mutex_unlock(&par->bo_mutex);
vfree(par->vmalloc);
framebuffer_release(info);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 6728c6247b4b..01f2dc9e6f52 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -2595,6 +2595,7 @@ void vmw_kms_helper_resource_finish(struct vmw_validation_ctx *ctx,
vmw_kms_helper_buffer_finish(res->dev_priv, NULL, ctx->buf,
out_fence, NULL);
+ vmw_dmabuf_unreference(&ctx->buf);
vmw_resource_unreserve(res, false, NULL, 0);
mutex_unlock(&res->dev_priv->cmdbuf_mutex);
}
@@ -2680,7 +2681,9 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv,
struct vmw_display_unit *du;
struct drm_display_mode *mode;
int i = 0;
+ int ret = 0;
+ mutex_lock(&dev_priv->dev->mode_config.mutex);
list_for_each_entry(con, &dev_priv->dev->mode_config.connector_list,
head) {
if (i == unit)
@@ -2691,7 +2694,8 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv,
if (i != unit) {
DRM_ERROR("Could not find initial display unit.\n");
- return -EINVAL;
+ ret = -EINVAL;
+ goto out_unlock;
}
if (list_empty(&con->modes))
@@ -2699,7 +2703,8 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv,
if (list_empty(&con->modes)) {
DRM_ERROR("Could not find initial display mode.\n");
- return -EINVAL;
+ ret = -EINVAL;
+ goto out_unlock;
}
du = vmw_connector_to_du(con);
@@ -2720,7 +2725,10 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv,
head);
}
- return 0;
+ out_unlock:
+ mutex_unlock(&dev_priv->dev->mode_config.mutex);
+
+ return ret;
}
/**