aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2024-10-06 03:59:22 -0400
committerPaolo Bonzini <pbonzini@redhat.com>2024-10-06 03:59:22 -0400
commitc8d430db8eec7d4fd13a6bea27b7086a54eda6da (patch)
tree3c9b35bc9372232183e745cc2a03995a8d053ff6 /drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
parent2a5fe5a01668e831af1de3951718fbf88b9a9b9c (diff)
parenta1d402abf8e3ff1d821e88993fc5331784fac0da (diff)
Merge tag 'kvmarm-fixes-6.12-1' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm64 fixes for 6.12, take #1 - Fix pKVM error path on init, making sure we do not change critical system registers as we're about to fail - Make sure that the host's vector length is at capped by a value common to all CPUs - Fix kvm_has_feat*() handling of "negative" features, as the current code is pretty broken - Promote Joey to the status of official reviewer, while James steps down -- hopefully only temporarly
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c278
1 files changed, 111 insertions, 167 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
index 8d75061f9f38..9d4f5352a62c 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
@@ -52,6 +52,42 @@
#define RDECODE_MSG_CREATE 0x00000000
#define RDECODE_MESSAGE_CREATE 0x00000001
+static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0_5[] = {
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_CONFIG),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE)
+};
+
static int amdgpu_ih_clientid_vcns[] = {
SOC15_IH_CLIENTID_VCN,
SOC15_IH_CLIENTID_VCN1
@@ -97,6 +133,8 @@ static int vcn_v4_0_5_sw_init(void *handle)
struct amdgpu_ring *ring;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int i, r;
+ uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_5);
+ uint32_t *ptr;
r = amdgpu_vcn_sw_init(adev);
if (r)
@@ -168,6 +206,14 @@ static int vcn_v4_0_5_sw_init(void *handle)
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
adev->vcn.pause_dpg_mode = vcn_v4_0_5_pause_dpg_mode;
+ /* Allocate memory for VCN IP Dump buffer */
+ ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for VCN IP Dump\n");
+ adev->vcn.ip_dump = NULL;
+ } else {
+ adev->vcn.ip_dump = ptr;
+ }
return 0;
}
@@ -207,6 +253,8 @@ static int vcn_v4_0_5_sw_fini(void *handle)
r = amdgpu_vcn_sw_fini(adev);
+ kfree(adev->vcn.ip_dump);
+
return r;
}
@@ -1347,170 +1395,6 @@ static void vcn_v4_0_5_unified_ring_set_wptr(struct amdgpu_ring *ring)
}
}
-static int vcn_v4_0_5_limit_sched(struct amdgpu_cs_parser *p,
- struct amdgpu_job *job)
-{
- struct drm_gpu_scheduler **scheds;
-
- /* The create msg must be in the first IB submitted */
- if (atomic_read(&job->base.entity->fence_seq))
- return -EINVAL;
-
- /* if VCN0 is harvested, we can't support AV1 */
- if (p->adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0)
- return -EINVAL;
-
- scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC]
- [AMDGPU_RING_PRIO_0].sched;
- drm_sched_entity_modify_sched(job->base.entity, scheds, 1);
- return 0;
-}
-
-static int vcn_v4_0_5_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
- uint64_t addr)
-{
- struct ttm_operation_ctx ctx = { false, false };
- struct amdgpu_bo_va_mapping *map;
- uint32_t *msg, num_buffers;
- struct amdgpu_bo *bo;
- uint64_t start, end;
- unsigned int i;
- void *ptr;
- int r;
-
- addr &= AMDGPU_GMC_HOLE_MASK;
- r = amdgpu_cs_find_mapping(p, addr, &bo, &map);
- if (r) {
- DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr);
- return r;
- }
-
- start = map->start * AMDGPU_GPU_PAGE_SIZE;
- end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE;
- if (addr & 0x7) {
- DRM_ERROR("VCN messages must be 8 byte aligned!\n");
- return -EINVAL;
- }
-
- bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
- amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
- r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
- if (r) {
- DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r);
- return r;
- }
-
- r = amdgpu_bo_kmap(bo, &ptr);
- if (r) {
- DRM_ERROR("Failed mapping the VCN message (%d)!\n", r);
- return r;
- }
-
- msg = ptr + addr - start;
-
- /* Check length */
- if (msg[1] > end - addr) {
- r = -EINVAL;
- goto out;
- }
-
- if (msg[3] != RDECODE_MSG_CREATE)
- goto out;
-
- num_buffers = msg[2];
- for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) {
- uint32_t offset, size, *create;
-
- if (msg[0] != RDECODE_MESSAGE_CREATE)
- continue;
-
- offset = msg[1];
- size = msg[2];
-
- if (offset + size > end) {
- r = -EINVAL;
- goto out;
- }
-
- create = ptr + addr + offset - start;
-
- /* H264, HEVC and VP9 can run on any instance */
- if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11)
- continue;
-
- r = vcn_v4_0_5_limit_sched(p, job);
- if (r)
- goto out;
- }
-
-out:
- amdgpu_bo_kunmap(bo);
- return r;
-}
-
-#define RADEON_VCN_ENGINE_TYPE_ENCODE (0x00000002)
-#define RADEON_VCN_ENGINE_TYPE_DECODE (0x00000003)
-
-#define RADEON_VCN_ENGINE_INFO (0x30000001)
-#define RADEON_VCN_ENGINE_INFO_MAX_OFFSET 16
-
-#define RENCODE_ENCODE_STANDARD_AV1 2
-#define RENCODE_IB_PARAM_SESSION_INIT 0x00000003
-#define RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET 64
-
-/* return the offset in ib if id is found, -1 otherwise
- * to speed up the searching we only search upto max_offset
- */
-static int vcn_v4_0_5_enc_find_ib_param(struct amdgpu_ib *ib, uint32_t id, int max_offset)
-{
- int i;
-
- for (i = 0; i < ib->length_dw && i < max_offset && ib->ptr[i] >= 8; i += ib->ptr[i]/4) {
- if (ib->ptr[i + 1] == id)
- return i;
- }
- return -1;
-}
-
-static int vcn_v4_0_5_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
- struct amdgpu_job *job,
- struct amdgpu_ib *ib)
-{
- struct amdgpu_ring *ring = amdgpu_job_ring(job);
- struct amdgpu_vcn_decode_buffer *decode_buffer;
- uint64_t addr;
- uint32_t val;
- int idx;
-
- /* The first instance can decode anything */
- if (!ring->me)
- return 0;
-
- /* RADEON_VCN_ENGINE_INFO is at the top of ib block */
- idx = vcn_v4_0_5_enc_find_ib_param(ib, RADEON_VCN_ENGINE_INFO,
- RADEON_VCN_ENGINE_INFO_MAX_OFFSET);
- if (idx < 0) /* engine info is missing */
- return 0;
-
- val = amdgpu_ib_get_value(ib, idx + 2); /* RADEON_VCN_ENGINE_TYPE */
- if (val == RADEON_VCN_ENGINE_TYPE_DECODE) {
- decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[idx + 6];
-
- if (!(decode_buffer->valid_buf_flag & 0x1))
- return 0;
-
- addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 |
- decode_buffer->msg_buffer_address_lo;
- return vcn_v4_0_5_dec_msg(p, job, addr);
- } else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE) {
- idx = vcn_v4_0_5_enc_find_ib_param(ib, RENCODE_IB_PARAM_SESSION_INIT,
- RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET);
- if (idx >= 0 && ib->ptr[idx + 2] == RENCODE_ENCODE_STANDARD_AV1)
- return vcn_v4_0_5_limit_sched(p, job);
- }
- return 0;
-}
-
static const struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_ENC,
.align_mask = 0x3f,
@@ -1518,7 +1402,6 @@ static const struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = {
.get_rptr = vcn_v4_0_5_unified_ring_get_rptr,
.get_wptr = vcn_v4_0_5_unified_ring_get_wptr,
.set_wptr = vcn_v4_0_5_unified_ring_set_wptr,
- .patch_cs_in_place = vcn_v4_0_5_ring_patch_cs_in_place,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
@@ -1733,6 +1616,67 @@ static void vcn_v4_0_5_set_irq_funcs(struct amdgpu_device *adev)
}
}
+static void vcn_v4_0_5_print_ip_state(void *handle, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, j;
+ uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_5);
+ uint32_t inst_off, is_powered;
+
+ if (!adev->vcn.ip_dump)
+ return;
+
+ drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (adev->vcn.harvest_config & (1 << i)) {
+ drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i);
+ continue;
+ }
+
+ inst_off = i * reg_count;
+ is_powered = (adev->vcn.ip_dump[inst_off] &
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1;
+
+ if (is_powered) {
+ drm_printf(p, "\nActive Instance:VCN%d\n", i);
+ for (j = 0; j < reg_count; j++)
+ drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_4_0_5[j].reg_name,
+ adev->vcn.ip_dump[inst_off + j]);
+ } else {
+ drm_printf(p, "\nInactive Instance:VCN%d\n", i);
+ }
+ }
+}
+
+static void vcn_v4_0_5_dump_ip_state(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, j;
+ bool is_powered;
+ uint32_t inst_off;
+ uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_5);
+
+ if (!adev->vcn.ip_dump)
+ return;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ inst_off = i * reg_count;
+ /* mmUVD_POWER_STATUS is always readable and is first element of the array */
+ adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, regUVD_POWER_STATUS);
+ is_powered = (adev->vcn.ip_dump[inst_off] &
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1;
+
+ if (is_powered)
+ for (j = 1; j < reg_count; j++)
+ adev->vcn.ip_dump[inst_off + j] =
+ RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_4_0_5[j],
+ i));
+ }
+}
+
static const struct amd_ip_funcs vcn_v4_0_5_ip_funcs = {
.name = "vcn_v4_0_5",
.early_init = vcn_v4_0_5_early_init,
@@ -1751,8 +1695,8 @@ static const struct amd_ip_funcs vcn_v4_0_5_ip_funcs = {
.post_soft_reset = NULL,
.set_clockgating_state = vcn_v4_0_5_set_clockgating_state,
.set_powergating_state = vcn_v4_0_5_set_powergating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
+ .dump_ip_state = vcn_v4_0_5_dump_ip_state,
+ .print_ip_state = vcn_v4_0_5_print_ip_state,
};
const struct amdgpu_ip_block_version vcn_v4_0_5_ip_block = {