aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-09-03 17:02:26 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-03 17:02:26 -0700
commit906dde0f355bd97c080c215811ae7db1137c4af8 (patch)
tree1e1b4ba58a59c4027f06d86e7430566ee0dcbb15 /drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
parent69c0067aa3f40d3e52ab78643aecb17d669d3389 (diff)
parent7846b12fe0b5feab5446d892f41b5140c1419109 (diff)
Merge tag 'drm-for-v4.14' of git://people.freedesktop.org/~airlied/linux
Pull drm updates from Dave Airlie: "This is the main drm pull request for 4.14 merge window. I'm sending this early, as my continuing journey into fatherhood is occurring really soon now, I'm going to be mostly useless for the next couple of weeks, though I may be able to read email, I doubt I'll be doing much patch applications or git sending. If anything urgent pops up I've asked Daniel/Jani/Alex/Sean to try and direct stuff towards you. Outside drm changes: Some rcar-du updates that touch the V4L tree, all acks should be in place. It adds one export to the radix tree code for new i915 use case. There are some minor AGP cleanups (don't see that too often). Changes to the vbox driver in staging to avoid breaking compilation. Summary: core: - Atomic helper fixes - Atomic UAPI fixes - Add YCBCR 4:2:0 support - Drop set_busid hook - Refactor fb_helper locking - Remove a bunch of internal APIs - Add a bunch of better default handlers - Format modifier/blob plane property added - More internal header refactoring - Make more internal API names consistent - Enhanced syncobj APIs (wait/signal/reset/create signalled) bridge: - Add Synopsys Designware MIPI DSI host bridge driver tiny: - Add Pervasive Displays RePaper displays - Add support for LEGO MINDSTORMS EV3 LCD i915: - Lots of GEN10/CNL support patches - drm syncobj support - Skylake+ watermark refactoring - GVT vGPU 48-bit ppgtt support - GVT performance improvements - NOA change ioctl - CCS (color compression) scanout support - GPU reset improvements amdgpu: - Initial hugepage support - BO migration logic rework - Vega10 improvements - Powerplay fixes - Stop reprogramming the MC - Fixes for ACP audio on stoney - SR-IOV fixes/improvements - Command submission overhead improvements amdkfd: - Non-dGPU upstreaming patches - Scratch VA ioctl - Image tiling modes - Update PM4 headers for new firmware - Drop all BUG_ONs. nouveau: - GP108 modesetting support. - Disable MSI on big endian. vmwgfx: - Add fence fd support. msm: - Runtime PM improvements exynos: - NV12MT support - Refactor KMS drivers imx-drm: - Lock scanout channel to improve memory bw - Cleanups etnaviv: - GEM object population fixes tegra: - Prep work for Tegra186 support - PRIME mmap support sunxi: - HDMI support improvements - HDMI CEC support omapdrm: - HDMI hotplug IRQ support - Big driver cleanup - OMAP5 DSI support rcar-du: - vblank fixes - VSP1 updates arcgpu: - Minor fixes stm: - Add STM32 DSI controller driver dw_hdmi: - Add support for Rockchip RK3399 - HDMI CEC support atmel-hlcdc: - Add 8-bit color support vc4: - Atomic fixes - New ioctl to attach a label to a buffer object - HDMI CEC support - Allow userspace to dictate rendering order on submit ioctl" * tag 'drm-for-v4.14' of git://people.freedesktop.org/~airlied/linux: (1074 commits) drm/syncobj: Add a signal ioctl (v3) drm/syncobj: Add a reset ioctl (v3) drm/syncobj: Add a syncobj_array_find helper drm/syncobj: Allow wait for submit and signal behavior (v5) drm/syncobj: Add a CREATE_SIGNALED flag drm/syncobj: Add a callback mechanism for replace_fence (v3) drm/syncobj: add sync obj wait interface. (v8) i915: Use drm_syncobj_fence_get drm/syncobj: Add a race-free drm_syncobj_fence_get helper (v2) drm/syncobj: Rename fence_get to find_fence drm: kirin: Add mode_valid logic to avoid mode clocks we can't generate drm/vmwgfx: Bump the version for fence FD support drm/vmwgfx: Add export fence to file descriptor support drm/vmwgfx: Add support for imported Fence File Descriptor drm/vmwgfx: Prepare to support fence fd drm/vmwgfx: Fix incorrect command header offset at restart drm/vmwgfx: Support the NOP_ERROR command drm/vmwgfx: Restart command buffers after errors drm/vmwgfx: Move irq bottom half processing to threads drm/vmwgfx: Don't use drm_irq_[un]install ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c227
1 files changed, 198 insertions, 29 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 133d06671e46..fb6e5dbd5a03 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -39,6 +39,12 @@
#include "vi_structs.h"
#include "vid.h"
+enum hqd_dequeue_request_type {
+ NO_ACTION = 0,
+ DRAIN_PIPE,
+ RESET_WAVES
+};
+
struct cik_sdma_rlc_registers;
/*
@@ -55,12 +61,15 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t hpd_size, uint64_t hpd_gpu_addr);
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
- uint32_t queue_id, uint32_t __user *wptr);
+ uint32_t queue_id, uint32_t __user *wptr,
+ uint32_t wptr_shift, uint32_t wptr_mask,
+ struct mm_struct *mm);
static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
uint32_t pipe_id, uint32_t queue_id);
static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
-static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+ enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
uint32_t queue_id);
static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
@@ -85,6 +94,33 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
uint8_t vmid);
static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
+static void set_scratch_backing_va(struct kgd_dev *kgd,
+ uint64_t va, uint32_t vmid);
+
+/* Because of REG_GET_FIELD() being used, we put this function in the
+ * asic specific file.
+ */
+static int get_tile_config(struct kgd_dev *kgd,
+ struct tile_config *config)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+ config->gb_addr_config = adev->gfx.config.gb_addr_config;
+ config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
+ MC_ARB_RAMCFG, NOOFBANK);
+ config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
+ MC_ARB_RAMCFG, NOOFRANKS);
+
+ config->tile_config_ptr = adev->gfx.config.tile_mode_array;
+ config->num_tile_configs =
+ ARRAY_SIZE(adev->gfx.config.tile_mode_array);
+ config->macro_tile_config_ptr =
+ adev->gfx.config.macrotile_mode_array;
+ config->num_macro_tile_configs =
+ ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
+
+ return 0;
+}
static const struct kfd2kgd_calls kfd2kgd = {
.init_gtt_mem_allocation = alloc_gtt_mem,
@@ -111,12 +147,15 @@ static const struct kfd2kgd_calls kfd2kgd = {
.get_atc_vmid_pasid_mapping_valid =
get_atc_vmid_pasid_mapping_valid,
.write_vmid_invalidate_request = write_vmid_invalidate_request,
- .get_fw_version = get_fw_version
+ .get_fw_version = get_fw_version,
+ .set_scratch_backing_va = set_scratch_backing_va,
+ .get_tile_config = get_tile_config,
};
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
{
return (struct kfd2kgd_calls *)&kfd2kgd;
+ return (struct kfd2kgd_calls *)&kfd2kgd;
}
static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
@@ -147,7 +186,7 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
- uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
lock_srbm(kgd, mec, pipe, queue_id, 0);
@@ -216,7 +255,7 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
uint32_t mec;
uint32_t pipe;
- mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
lock_srbm(kgd, mec, pipe, 0, 0);
@@ -244,20 +283,67 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
}
static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
- uint32_t queue_id, uint32_t __user *wptr)
+ uint32_t queue_id, uint32_t __user *wptr,
+ uint32_t wptr_shift, uint32_t wptr_mask,
+ struct mm_struct *mm)
{
- struct vi_mqd *m;
- uint32_t shadow_wptr, valid_wptr;
struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct vi_mqd *m;
+ uint32_t *mqd_hqd;
+ uint32_t reg, wptr_val, data;
m = get_mqd(mqd);
- valid_wptr = copy_from_user(&shadow_wptr, wptr, sizeof(shadow_wptr));
- if (valid_wptr == 0)
- m->cp_hqd_pq_wptr = shadow_wptr;
-
acquire_queue(kgd, pipe_id, queue_id);
- gfx_v8_0_mqd_commit(adev, mqd);
+
+ /* HIQ is set during driver init period with vmid set to 0*/
+ if (m->cp_hqd_vmid == 0) {
+ uint32_t value, mec, pipe;
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+ mec, pipe, queue_id);
+ value = RREG32(mmRLC_CP_SCHEDULERS);
+ value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
+ ((mec << 5) | (pipe << 3) | queue_id | 0x80));
+ WREG32(mmRLC_CP_SCHEDULERS, value);
+ }
+
+ /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
+ mqd_hqd = &m->cp_mqd_base_addr_lo;
+
+ for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_CONTROL; reg++)
+ WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
+
+ /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
+ * This is safe since EOP RPTR==WPTR for any inactive HQD
+ * on ASICs that do not support context-save.
+ * EOP writes/reads can start anywhere in the ring.
+ */
+ if (get_amdgpu_device(kgd)->asic_type != CHIP_TONGA) {
+ WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr);
+ WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr);
+ WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem);
+ }
+
+ for (reg = mmCP_HQD_EOP_EVENTS; reg <= mmCP_HQD_ERROR; reg++)
+ WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
+
+ /* Copy userspace write pointer value to register.
+ * Activate doorbell logic to monitor subsequent changes.
+ */
+ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+ CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+ WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+ if (read_user_wptr(mm, wptr, wptr_val))
+ WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
+
+ data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+ WREG32(mmCP_HQD_ACTIVE, data);
+
release_queue(kgd);
return 0;
@@ -308,29 +394,102 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
return false;
}
-static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+ enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
uint32_t queue_id)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t temp;
- int timeout = utimeout;
+ enum hqd_dequeue_request_type type;
+ unsigned long flags, end_jiffies;
+ int retry;
+ struct vi_mqd *m = get_mqd(mqd);
acquire_queue(kgd, pipe_id, queue_id);
- WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type);
+ if (m->cp_hqd_vmid == 0)
+ WREG32_FIELD(RLC_CP_SCHEDULERS, scheduler1, 0);
+
+ switch (reset_type) {
+ case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
+ type = DRAIN_PIPE;
+ break;
+ case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
+ type = RESET_WAVES;
+ break;
+ default:
+ type = DRAIN_PIPE;
+ break;
+ }
+ /* Workaround: If IQ timer is active and the wait time is close to or
+ * equal to 0, dequeueing is not safe. Wait until either the wait time
+ * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
+ * cleared before continuing. Also, ensure wait times are set to at
+ * least 0x3.
+ */
+ local_irq_save(flags);
+ preempt_disable();
+ retry = 5000; /* wait for 500 usecs at maximum */
+ while (true) {
+ temp = RREG32(mmCP_HQD_IQ_TIMER);
+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
+ pr_debug("HW is processing IQ\n");
+ goto loop;
+ }
+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
+ == 3) /* SEM-rearm is safe */
+ break;
+ /* Wait time 3 is safe for CP, but our MMIO read/write
+ * time is close to 1 microsecond, so check for 10 to
+ * leave more buffer room
+ */
+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
+ >= 10)
+ break;
+ pr_debug("IQ timer is active\n");
+ } else
+ break;
+loop:
+ if (!retry) {
+ pr_err("CP HQD IQ timer status time out\n");
+ break;
+ }
+ ndelay(100);
+ --retry;
+ }
+ retry = 1000;
+ while (true) {
+ temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
+ if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
+ break;
+ pr_debug("Dequeue request is pending\n");
+
+ if (!retry) {
+ pr_err("CP HQD dequeue request time out\n");
+ break;
+ }
+ ndelay(100);
+ --retry;
+ }
+ local_irq_restore(flags);
+ preempt_enable();
+
+ WREG32(mmCP_HQD_DEQUEUE_REQUEST, type);
+
+ end_jiffies = (utimeout * HZ / 1000) + jiffies;
while (true) {
temp = RREG32(mmCP_HQD_ACTIVE);
- if (temp & CP_HQD_ACTIVE__ACTIVE_MASK)
+ if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
break;
- if (timeout <= 0) {
- pr_err("kfd: cp queue preemption time out.\n");
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("cp queue preemption time out.\n");
release_queue(kgd);
return -ETIME;
}
- msleep(20);
- timeout -= 20;
+ usleep_range(500, 1000);
}
release_queue(kgd);
@@ -444,6 +603,16 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
return 0;
}
+static void set_scratch_backing_va(struct kgd_dev *kgd,
+ uint64_t va, uint32_t vmid)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+ lock_srbm(kgd, 0, 0, 0, vmid);
+ WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
+ unlock_srbm(kgd);
+}
+
static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
{
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
@@ -454,42 +623,42 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
switch (type) {
case KGD_ENGINE_PFP:
hdr = (const union amdgpu_firmware_header *)
- adev->gfx.pfp_fw->data;
+ adev->gfx.pfp_fw->data;
break;
case KGD_ENGINE_ME:
hdr = (const union amdgpu_firmware_header *)
- adev->gfx.me_fw->data;
+ adev->gfx.me_fw->data;
break;
case KGD_ENGINE_CE:
hdr = (const union amdgpu_firmware_header *)
- adev->gfx.ce_fw->data;
+ adev->gfx.ce_fw->data;
break;
case KGD_ENGINE_MEC1:
hdr = (const union amdgpu_firmware_header *)
- adev->gfx.mec_fw->data;
+ adev->gfx.mec_fw->data;
break;
case KGD_ENGINE_MEC2:
hdr = (const union amdgpu_firmware_header *)
- adev->gfx.mec2_fw->data;
+ adev->gfx.mec2_fw->data;
break;
case KGD_ENGINE_RLC:
hdr = (const union amdgpu_firmware_header *)
- adev->gfx.rlc_fw->data;
+ adev->gfx.rlc_fw->data;
break;
case KGD_ENGINE_SDMA1:
hdr = (const union amdgpu_firmware_header *)
- adev->sdma.instance[0].fw->data;
+ adev->sdma.instance[0].fw->data;
break;
case KGD_ENGINE_SDMA2:
hdr = (const union amdgpu_firmware_header *)
- adev->sdma.instance[1].fw->data;
+ adev->sdma.instance[1].fw->data;
break;
default: