aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r--drivers/gpu/drm/Kconfig1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c26
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c89
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c56
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c63
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v11_0.c61
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v12_0.c304
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15d.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc24.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c1
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c3
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c36
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h3
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c9
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_stream.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/Makefile2
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h4
-rw-r--r--drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c2
-rw-r--r--drivers/gpu/drm/amd/include/discovery.h42
-rw-r--r--drivers/gpu/drm/amd/include/mes_v11_api_def.h3
-rw-r--r--drivers/gpu/drm/amd/include/mes_v12_api_def.h10
-rw-r--r--drivers/gpu/drm/amd/pm/amdgpu_dpm.c3
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppevvmath.h14
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c21
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h18
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h52
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c7
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c4
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c108
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c2
-rw-r--r--drivers/gpu/drm/ast/ast_dp.c7
-rw-r--r--drivers/gpu/drm/ast/ast_drv.c5
-rw-r--r--drivers/gpu/drm/ast/ast_drv.h1
-rw-r--r--drivers/gpu/drm/ast/ast_mode.c29
-rw-r--r--drivers/gpu/drm/drm_atomic_uapi.c14
-rw-r--r--drivers/gpu/drm/drm_bridge_connector.c8
-rw-r--r--drivers/gpu/drm/drm_buddy.c25
-rw-r--r--drivers/gpu/drm/drm_client.c2
-rw-r--r--drivers/gpu/drm/drm_client_modeset.c5
-rw-r--r--drivers/gpu/drm/drm_fb_helper.c11
-rw-r--r--drivers/gpu/drm/drm_panel_orientation_quirks.c18
-rw-r--r--drivers/gpu/drm/i915/display/intel_backlight.c3
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp.c12
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_hdcp.c4
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_mst.c40
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_mst.h1
-rw-r--r--drivers/gpu/drm/i915/display/intel_dpll_mgr.c6
-rw-r--r--drivers/gpu/drm/i915/display/intel_hdcp_regs.h2
-rw-r--r--drivers/gpu/drm/i915/display/intel_pps.c3
-rw-r--r--drivers/gpu/drm/i915/display/vlv_dsi.c1
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_mman.c55
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm.c13
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c31
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c10
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h2
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c33
-rw-r--r--drivers/gpu/drm/i915/intel_device_info.c7
-rw-r--r--drivers/gpu/drm/i915/intel_device_info.h3
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_drv.c4
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_gpu.c2
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c4
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c4
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h14
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c20
-rw-r--r--drivers/gpu/drm/msm/dp/dp_ctrl.c2
-rw-r--r--drivers/gpu/drm/msm/dp/dp_panel.c19
-rw-r--r--drivers/gpu/drm/msm/msm_mdss.c2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo.c2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_chan.c2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_dmem.c2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_fence.c30
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_fence.h2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_gem.c2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_prime.c3
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_uvmm.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/core/firmware.c9
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/falcon/fw.c6
-rw-r--r--drivers/gpu/drm/omapdrm/Kconfig1
-rw-r--r--drivers/gpu/drm/radeon/evergreen_cs.c2
-rw-r--r--drivers/gpu/drm/rockchip/inno_hdmi.c4
-rw-r--r--drivers/gpu/drm/tests/drm_gem_shmem_test.c11
-rw-r--r--drivers/gpu/drm/v3d/v3d_drv.h4
-rw-r--r--drivers/gpu/drm/v3d/v3d_sched.c64
-rw-r--r--drivers/gpu/drm/v3d/v3d_submit.c121
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_submit.c2
-rw-r--r--drivers/gpu/drm/vmwgfx/vmw_surface_cache.h10
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_blit.c114
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_bo.c140
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_bo.h18
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.h44
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_fence.c17
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_gem.c62
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_kms.c502
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_kms.h17
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c14
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c2
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_prime.c32
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_resource.c27
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c33
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c186
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_surface.c282
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_vkms.c40
-rw-r--r--drivers/gpu/drm/xe/Makefile2
-rw-r--r--drivers/gpu/drm/xe/display/xe_display.c28
-rw-r--r--drivers/gpu/drm/xe/display/xe_dsb_buffer.c8
-rw-r--r--drivers/gpu/drm/xe/display/xe_fb_pin.c3
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gt_regs.h9
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c6
-rw-r--r--drivers/gpu/drm/xe/xe_device.c91
-rw-r--r--drivers/gpu/drm/xe/xe_device.h4
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h3
-rw-r--r--drivers/gpu/drm/xe/xe_drm_client.c5
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.c34
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue_types.h9
-rw-r--r--drivers/gpu/drm/xe/xe_gsc.c8
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c55
-rw-r--r--drivers/gpu/drm/xe/xe_gt_pagefault.c18
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c11
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf.c2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c201
-rw-r--r--drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h12
-rw-r--r--drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h4
-rw-r--r--drivers/gpu/drm/xe/xe_gt_types.h6
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.c10
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c8
-rw-r--r--drivers/gpu/drm/xe/xe_hw_fence.c9
-rw-r--r--drivers/gpu/drm/xe/xe_hw_fence_types.h7
-rw-r--r--drivers/gpu/drm/xe/xe_hwmon.c5
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.c15
-rw-r--r--drivers/gpu/drm/xe/xe_mmio.c28
-rw-r--r--drivers/gpu/drm/xe/xe_observation.c1
-rw-r--r--drivers/gpu/drm/xe/xe_pat.c11
-rw-r--r--drivers/gpu/drm/xe/xe_pm.c11
-rw-r--r--drivers/gpu/drm/xe/xe_preempt_fence.c3
-rw-r--r--drivers/gpu/drm/xe/xe_preempt_fence_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_pt.c26
-rw-r--r--drivers/gpu/drm/xe/xe_rtp.c2
-rw-r--r--drivers/gpu/drm/xe/xe_sched_job.c3
-rw-r--r--drivers/gpu/drm/xe/xe_sync.c14
-rw-r--r--drivers/gpu/drm/xe/xe_trace.h2
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_vram_mgr.c2
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c59
-rw-r--r--drivers/gpu/drm/xe/xe_wa.c18
-rw-r--r--drivers/gpu/drm/xe/xe_wa_oob.rules1
171 files changed, 2864 insertions, 1288 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index fd0749c0c630..6b2c6b91f962 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -268,6 +268,7 @@ config DRM_EXEC
config DRM_GPUVM
tristate
depends on DRM
+ select DRM_EXEC
help
GPU-VM representation providing helpers to manage a GPUs virtual
address space
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 916b6b8cf7d9..6dfdff58bffd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1057,6 +1057,9 @@ static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p,
r = amdgpu_ring_parse_cs(ring, p, job, ib);
if (r)
return r;
+
+ if (ib->sa_bo)
+ ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
} else {
ib->ptr = (uint32_t *)kptr;
r = amdgpu_ring_patch_cs_in_place(ring, p, job, ib);
@@ -1778,7 +1781,7 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
struct ttm_operation_ctx ctx = { false, false };
struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_bo_va_mapping *mapping;
- int r;
+ int i, r;
addr /= AMDGPU_GPU_PAGE_SIZE;
@@ -1793,13 +1796,13 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->exec.ticket)
return -EINVAL;
- if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {
- (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
- amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);
- r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);
- if (r)
- return r;
- }
+ (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+ amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);
+ for (i = 0; i < (*bo)->placement.num_placement; i++)
+ (*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
+ r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);
+ if (r)
+ return r;
return amdgpu_ttm_alloc_gart(&(*bo)->tbo);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 5cb33ac99f70..c43d1b6e5d66 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -685,16 +685,24 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
switch (args->in.op) {
case AMDGPU_CTX_OP_ALLOC_CTX:
+ if (args->in.flags)
+ return -EINVAL;
r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id);
args->out.alloc.ctx_id = id;
break;
case AMDGPU_CTX_OP_FREE_CTX:
+ if (args->in.flags)
+ return -EINVAL;
r = amdgpu_ctx_free(fpriv, id);
break;
case AMDGPU_CTX_OP_QUERY_STATE:
+ if (args->in.flags)
+ return -EINVAL;
r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
break;
case AMDGPU_CTX_OP_QUERY_STATE2:
+ if (args->in.flags)
+ return -EINVAL;
r = amdgpu_ctx_query2(adev, fpriv, id, &args->out);
break;
case AMDGPU_CTX_OP_GET_STABLE_PSTATE:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index ac108fca64fe..4bd61c169ca8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -278,7 +278,7 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
msg = RREG32(mmMP0_SMN_C2PMSG_33);
if (msg & 0x80000000)
break;
- usleep_range(1000, 1100);
+ msleep(1);
}
}
@@ -1500,6 +1500,7 @@ union gc_info {
struct gc_info_v1_0 v1;
struct gc_info_v1_1 v1_1;
struct gc_info_v1_2 v1_2;
+ struct gc_info_v1_3 v1_3;
struct gc_info_v2_0 v2;
struct gc_info_v2_1 v2_1;
};
@@ -1558,6 +1559,16 @@ static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
adev->gfx.config.gc_gl1c_size_per_instance = le32_to_cpu(gc_info->v1_2.gc_gl1c_size_per_instance);
adev->gfx.config.gc_gl2c_per_gpu = le32_to_cpu(gc_info->v1_2.gc_gl2c_per_gpu);
}
+ if (le16_to_cpu(gc_info->v1.header.version_minor) >= 3) {
+ adev->gfx.config.gc_tcp_size_per_cu = le32_to_cpu(gc_info->v1_3.gc_tcp_size_per_cu);
+ adev->gfx.config.gc_tcp_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_tcp_cache_line_size);
+ adev->gfx.config.gc_instruction_cache_size_per_sqc = le32_to_cpu(gc_info->v1_3.gc_instruction_cache_size_per_sqc);
+ adev->gfx.config.gc_instruction_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_instruction_cache_line_size);
+ adev->gfx.config.gc_scalar_data_cache_size_per_sqc = le32_to_cpu(gc_info->v1_3.gc_scalar_data_cache_size_per_sqc);
+ adev->gfx.config.gc_scalar_data_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_scalar_data_cache_line_size);
+ adev->gfx.config.gc_tcc_size = le32_to_cpu(gc_info->v1_3.gc_tcc_size);
+ adev->gfx.config.gc_tcc_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_tcc_cache_line_size);
+ }
break;
case 2:
adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v2.gc_num_se);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 82452606ae6c..c770cb201e64 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -509,6 +509,16 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
int i, r = 0;
int j;
+ if (adev->enable_mes) {
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ j = i + xcc_id * adev->gfx.num_compute_rings;
+ amdgpu_mes_unmap_legacy_queue(adev,
+ &adev->gfx.compute_ring[j],
+ RESET_QUEUES, 0, 0);
+ }
+ return 0;
+ }
+
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL;
@@ -551,6 +561,18 @@ int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id)
int i, r = 0;
int j;
+ if (adev->enable_mes) {
+ if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ j = i + xcc_id * adev->gfx.num_gfx_rings;
+ amdgpu_mes_unmap_legacy_queue(adev,
+ &adev->gfx.gfx_ring[j],
+ PREEMPT_QUEUES, 0, 0);
+ }
+ }
+ return 0;
+ }
+
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL;
@@ -995,7 +1017,7 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_
if (amdgpu_device_skip_hw_access(adev))
return 0;
- if (adev->mes.ring.sched.ready)
+ if (adev->mes.ring[0].sched.ready)
return amdgpu_mes_rreg(adev, reg);
BUG_ON(!ring->funcs->emit_rreg);
@@ -1065,7 +1087,7 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint3
if (amdgpu_device_skip_hw_access(adev))
return;
- if (adev->mes.ring.sched.ready) {
+ if (adev->mes.ring[0].sched.ready) {
amdgpu_mes_wreg(adev, reg, v);
return;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index ddda94e49db4..56cc58edbb4e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -240,6 +240,12 @@ struct amdgpu_gfx_config {
uint32_t gc_tcp_size_per_cu;
uint32_t gc_num_cu_per_sqc;
uint32_t gc_tcc_size;
+ uint32_t gc_tcp_cache_line_size;
+ uint32_t gc_instruction_cache_size_per_sqc;
+ uint32_t gc_instruction_cache_line_size;
+ uint32_t gc_scalar_data_cache_size_per_sqc;
+ uint32_t gc_scalar_data_cache_line_size;
+ uint32_t gc_tcc_cache_line_size;
};
struct amdgpu_cu_info {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index c02659025656..b49b3650fd62 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -589,7 +589,8 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
ring = adev->rings[i];
vmhub = ring->vm_hub;
- if (ring == &adev->mes.ring ||
+ if (ring == &adev->mes.ring[0] ||
+ ring == &adev->mes.ring[1] ||
ring == &adev->umsch_mm.ring)
continue;
@@ -761,7 +762,7 @@ void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev,
unsigned long flags;
uint32_t seq;
- if (adev->mes.ring.sched.ready) {
+ if (adev->mes.ring[0].sched.ready) {
amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1,
ref, mask);
return;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index febca3130497..4d951a1baefa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -156,6 +156,8 @@ struct amdgpu_gmc_funcs {
uint64_t addr, uint64_t *flags);
/* get the amount of memory used by the vbios for pre-OS console */
unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev);
+ /* get the DCC buffer alignment */
+ unsigned int (*get_dcc_alignment)(struct amdgpu_device *adev);
enum amdgpu_memory_partition (*query_mem_partition_mode)(
struct amdgpu_device *adev);
@@ -363,6 +365,10 @@ struct amdgpu_gmc {
(adev)->gmc.gmc_funcs->override_vm_pte_flags \
((adev), (vm), (addr), (pte_flags))
#define amdgpu_gmc_get_vbios_fb_size(adev) (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev))
+#define amdgpu_gmc_get_dcc_alignment(adev) ({ \
+ typeof(adev) _adev = (adev); \
+ _adev->gmc.gmc_funcs->get_dcc_alignment(_adev); \
+})
/**
* amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index e238f2832f65..908e13455152 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -264,9 +264,8 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job,
struct dma_fence *fence = NULL;
int r;
- /* Ignore soft recovered fences here */
r = drm_sched_entity_error(s_entity);
- if (r && r != -ENODATA)
+ if (r)
goto error;
if (!fence && job->gang_submit)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index e499d6ba306b..1cb1ec7beefe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -103,7 +103,7 @@ static int amdgpu_mes_event_log_init(struct amdgpu_device *adev)
if (!amdgpu_mes_log_enable)
return 0;
- r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_LOG_BUFFER_SIZE, PAGE_SIZE,
+ r = amdgpu_bo_create_kernel(adev, adev->mes.event_log_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
&adev->mes.event_log_gpu_obj,
&adev->mes.event_log_gpu_addr,
@@ -113,7 +113,7 @@ static int amdgpu_mes_event_log_init(struct amdgpu_device *adev)
return r;
}
- memset(adev->mes.event_log_cpu_addr, 0, PAGE_SIZE);
+ memset(adev->mes.event_log_cpu_addr, 0, adev->mes.event_log_size);
return 0;
@@ -135,9 +135,11 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
idr_init(&adev->mes.queue_id_idr);
ida_init(&adev->mes.doorbell_ida);
spin_lock_init(&adev->mes.queue_id_lock);
- spin_lock_init(&adev->mes.ring_lock);
mutex_init(&adev->mes.mutex_hidden);
+ for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++)
+ spin_lock_init(&adev->mes.ring_lock[i]);
+
adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK;
adev->mes.vmid_mask_mmhub = 0xffffff00;
adev->mes.vmid_mask_gfxhub = 0xffffff00;
@@ -163,36 +165,38 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
adev->mes.sdma_hqd_mask[i] = 0xfc;
}
- r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs);
- if (r) {
- dev_err(adev->dev,
- "(%d) ring trail_fence_offs wb alloc failed\n", r);
- goto error_ids;
- }
- adev->mes.sch_ctx_gpu_addr =
- adev->wb.gpu_addr + (adev->mes.sch_ctx_offs * 4);
- adev->mes.sch_ctx_ptr =
- (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs];
+ for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) {
+ r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs[i]);
+ if (r) {
+ dev_err(adev->dev,
+ "(%d) ring trail_fence_offs wb alloc failed\n",
+ r);
+ goto error;
+ }
+ adev->mes.sch_ctx_gpu_addr[i] =
+ adev->wb.gpu_addr + (adev->mes.sch_ctx_offs[i] * 4);
+ adev->mes.sch_ctx_ptr[i] =
+ (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs[i]];
- r = amdgpu_device_wb_get(adev, &adev->mes.query_status_fence_offs);
- if (r) {
- amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
- dev_err(adev->dev,
- "(%d) query_status_fence_offs wb alloc failed\n", r);
- goto error_ids;
+ r = amdgpu_device_wb_get(adev,
+ &adev->mes.query_status_fence_offs[i]);
+ if (r) {
+ dev_err(adev->dev,
+ "(%d) query_status_fence_offs wb alloc failed\n",
+ r);
+ goto error;
+ }
+ adev->mes.query_status_fence_gpu_addr[i] = adev->wb.gpu_addr +
+ (adev->mes.query_status_fence_offs[i] * 4);
+ adev->mes.query_status_fence_ptr[i] =
+ (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs[i]];
}
- adev->mes.query_status_fence_gpu_addr =
- adev->wb.gpu_addr + (adev->mes.query_status_fence_offs * 4);
- adev->mes.query_status_fence_ptr =
- (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs];
r = amdgpu_device_wb_get(adev, &adev->mes.read_val_offs);
if (r) {
- amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
- amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
dev_err(adev->dev,
"(%d) read_val_offs alloc failed\n", r);
- goto error_ids;
+ goto error;
}
adev->mes.read_val_gpu_addr =
adev->wb.gpu_addr + (adev->mes.read_val_offs * 4);
@@ -212,10 +216,16 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
error_doorbell:
amdgpu_mes_doorbell_free(adev);
error:
- amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
- amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
- amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
-error_ids:
+ for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) {
+ if (adev->mes.sch_ctx_ptr[i])
+ amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]);
+ if (adev->mes.query_status_fence_ptr[i])
+ amdgpu_device_wb_free(adev,
+ adev->mes.query_status_fence_offs[i]);
+ }
+ if (adev->mes.read_val_ptr)
+ amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
+
idr_destroy(&adev->mes.pasid_idr);
idr_destroy(&adev->mes.gang_id_idr);
idr_destroy(&adev->mes.queue_id_idr);
@@ -226,13 +236,22 @@ error_ids:
void amdgpu_mes_fini(struct amdgpu_device *adev)
{
+ int i;
+
amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj,
&adev->mes.event_log_gpu_addr,
&adev->mes.event_log_cpu_addr);
- amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
- amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
- amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
+ for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) {
+ if (adev->mes.sch_ctx_ptr[i])
+ amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]);
+ if (adev->mes.query_status_fence_ptr[i])
+ amdgpu_device_wb_free(adev,
+ adev->mes.query_status_fence_offs[i]);
+ }
+ if (adev->mes.read_val_ptr)
+ amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
+
amdgpu_mes_doorbell_free(adev);
idr_destroy(&adev->mes.pasid_idr);
@@ -1499,7 +1518,7 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe)
amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix,
sizeof(ucode_prefix));
- if (adev->enable_uni_mes && pipe == AMDGPU_MES_SCHED_PIPE) {
+ if (adev->enable_uni_mes) {
snprintf(fw_name, sizeof(fw_name),
"amdgpu/%s_uni_mes.bin", ucode_prefix);
} else if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) &&
@@ -1573,7 +1592,7 @@ static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused)
uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr);
seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4,
- mem, AMDGPU_MES_LOG_BUFFER_SIZE, false);
+ mem, adev->mes.event_log_size, false);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index e11051271f71..0bc837dab578 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -52,7 +52,6 @@ enum amdgpu_mes_priority_level {
#define AMDGPU_MES_PROC_CTX_SIZE 0x1000 /* one page area */
#define AMDGPU_MES_GANG_CTX_SIZE 0x1000 /* one page area */
-#define AMDGPU_MES_LOG_BUFFER_SIZE 0x4000 /* Maximu log buffer size for MES */
struct amdgpu_mes_funcs;
@@ -83,8 +82,8 @@ struct amdgpu_mes {
uint64_t default_process_quantum;
uint64_t default_gang_quantum;
- struct amdgpu_ring ring;
- spinlock_t ring_lock;
+ struct amdgpu_ring ring[AMDGPU_MAX_MES_PIPES];
+ spinlock_t ring_lock[AMDGPU_MAX_MES_PIPES];
const struct firmware *fw[AMDGPU_MAX_MES_PIPES];
@@ -113,12 +112,12 @@ struct amdgpu_mes {
uint32_t gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES];
uint32_t sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES];
uint32_t aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS];
- uint32_t sch_ctx_offs;
- uint64_t sch_ctx_gpu_addr;
- uint64_t *sch_ctx_ptr;
- uint32_t query_status_fence_offs;
- uint64_t query_status_fence_gpu_addr;
- uint64_t *query_status_fence_ptr;
+ uint32_t sch_ctx_offs[AMDGPU_MAX_MES_PIPES];
+ uint64_t sch_ctx_gpu_addr[AMDGPU_MAX_MES_PIPES];
+ uint64_t *sch_ctx_ptr[AMDGPU_MAX_MES_PIPES];
+ uint32_t query_status_fence_offs[AMDGPU_MAX_MES_PIPES];
+ uint64_t query_status_fence_gpu_addr[AMDGPU_MAX_MES_PIPES];
+ uint64_t *query_status_fence_ptr[AMDGPU_MAX_MES_PIPES];
uint32_t read_val_offs;
uint64_t read_val_gpu_addr;
uint32_t *read_val_ptr;
@@ -135,8 +134,9 @@ struct amdgpu_mes {
unsigned long *doorbell_bitmap;
/* MES event log buffer */
- struct amdgpu_bo *event_log_gpu_obj;
- uint64_t event_log_gpu_addr;
+ uint32_t event_log_size;
+ struct amdgpu_bo *event_log_gpu_obj;
+ uint64_t event_log_gpu_addr;
void *event_log_cpu_addr;
/* ip specific functions */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
index 0c856005df6b..38face981c3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
@@ -166,6 +166,9 @@ static ssize_t ta_if_load_debugfs_write(struct file *fp, const char *buf, size_t
if (ret)
return -EFAULT;
+ if (ta_bin_len > PSP_1_MEG)
+ return -EINVAL;
+
copy_pos += sizeof(uint32_t);
ta_bin = kzalloc(ta_bin_len, GFP_KERNEL);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index ad49cecb20b8..e6344a6b0a9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -212,6 +212,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
*/
if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
sched_hw_submission = max(sched_hw_submission, 256);
+ if (ring->funcs->type == AMDGPU_RING_TYPE_MES)
+ sched_hw_submission = 8;
else if (ring == &adev->sdma.instance[0].page)
sched_hw_submission = 256;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index 1a5439abd1a0..c87d68d4be53 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -461,8 +461,11 @@ struct amdgpu_vcn5_fw_shared {
struct amdgpu_fw_shared_unified_queue_struct sq;
uint8_t pad1[8];
struct amdgpu_fw_shared_fw_logging fw_log;
+ uint8_t pad2[20];
struct amdgpu_fw_shared_rb_setup rb_setup;
- uint8_t pad2[4];
+ struct amdgpu_fw_shared_smu_interface_info smu_dpm_interface;
+ struct amdgpu_fw_shared_drm_key_wa drm_key_wa;
+ uint8_t pad3[9];
};
#define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 111c380f929b..b287a82e6177 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -858,7 +858,7 @@ void amdgpu_virt_post_reset(struct amdgpu_device *adev)
adev->gfx.is_poweron = false;
}
- adev->mes.ring.sched.ready = false;
+ adev->mes.ring[0].sched.ready = false;
}
bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, uint32_t ucode_id)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index f91cc149d06c..7d26a962f811 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -456,6 +456,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
u64 vis_usage = 0, max_bytes, min_block_size;
struct amdgpu_vram_mgr_resource *vres;
u64 size, remaining_size, lpfn, fpfn;
+ unsigned int adjust_dcc_size = 0;
struct drm_buddy *mm = &mgr->mm;
struct drm_buddy_block *block;
unsigned long pages_per_block;
@@ -511,7 +512,19 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
/* Allocate blocks in desired range */
vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
+ if (bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC &&
+ adev->gmc.gmc_funcs->get_dcc_alignment)
+ adjust_dcc_size = amdgpu_gmc_get_dcc_alignment(adev);
+
remaining_size = (u64)vres->base.size;
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) {
+ unsigned int dcc_size;
+
+ dcc_size = roundup_pow_of_two(vres->base.size + adjust_dcc_size);
+ remaining_size = (u64)dcc_size;
+
+ vres->flags |= DRM_BUDDY_TRIM_DISABLE;
+ }
mutex_lock(&mgr->lock);
while (remaining_size) {
@@ -521,8 +534,11 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
min_block_size = mgr->default_page_size;
size = remaining_size;
- if ((size >= (u64)pages_per_block << PAGE_SHIFT) &&
- !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1)))
+
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size)
+ min_block_size = size;
+ else if ((size >= (u64)pages_per_block << PAGE_SHIFT) &&
+ !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1)))
min_block_size = (u64)pages_per_block << PAGE_SHIFT;
BUG_ON(min_block_size < mm->chunk_size);
@@ -553,6 +569,22 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
}
mutex_unlock(&mgr->lock);
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) {
+ struct drm_buddy_block *dcc_block;
+ unsigned long dcc_start;
+ u64 trim_start;
+
+ dcc_block = amdgpu_vram_mgr_first_block(&vres->blocks);
+ /* Adjust the start address for DCC buffers only */
+ dcc_start =
+ roundup((unsigned long)amdgpu_vram_mgr_block_start(dcc_block),
+ adjust_dcc_size);
+ trim_start = (u64)dcc_start;
+ drm_buddy_block_trim(mm, &trim_start,
+ (u64)vres->base.size,
+ &vres->blocks);
+ }
+
vres->base.start = 0;
size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks),
vres->base.size);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 2957702fca0c..e444e621ddaa 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -4116,6 +4116,7 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev)
static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
{
+ char fw_name[53];
char ucode_prefix[30];
const char *wks = "";
int err;
@@ -4149,8 +4150,8 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
if (!amdgpu_sriov_vf(adev)) {
- err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
- "amdgpu/%s_rlc.bin", ucode_prefix);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix);
+ err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
if (err)
goto out;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
index f384be0d1800..e45d23e82878 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -202,6 +202,12 @@ static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = {
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ)
};
+static const struct soc15_reg_golden golden_settings_gc_12_0[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_MEM_CONFIG, 0x0000000f, 0x0000000f),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regCB_HW_CONTROL_1, 0x03000000, 0x03000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL5, 0x00000070, 0x00000020)
+};
+
#define DEFAULT_SH_MEM_CONFIG \
((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
(SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
@@ -2999,7 +3005,7 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
(order_base_2(prop->queue_size / 4) - 1));
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
@@ -3432,6 +3438,24 @@ static void gfx_v12_0_disable_gpa_mode(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data);
}
+static void gfx_v12_0_init_golden_registers(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ if (adev->rev_id == 0)
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_12_0,
+ (const u32)ARRAY_SIZE(golden_settings_gc_12_0));
+ break;
+ default:
+ break;
+ }
+}
+
static int gfx_v12_0_hw_init(void *handle)
{
int r;
@@ -3472,6 +3496,9 @@ static int gfx_v12_0_hw_init(void *handle)
}
}
+ if (!amdgpu_emu_mode)
+ gfx_v12_0_init_golden_registers(adev);
+
adev->gfx.is_poweron = true;
if (get_gb_addr_config(adev))
@@ -3519,33 +3546,9 @@ static int gfx_v12_0_hw_init(void *handle)
return r;
}
-static int gfx_v12_0_kiq_disable_kgq(struct amdgpu_device *adev)
-{
- struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
- struct amdgpu_ring *kiq_ring = &kiq->ring;
- int i, r = 0;
-
- if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
- return -EINVAL;
-
- if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
- adev->gfx.num_gfx_rings))
- return -ENOMEM;
-
- for (i = 0; i < adev->gfx.num_gfx_rings; i++)
- kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i],
- PREEMPT_QUEUES, 0, 0);
-
- if (adev->gfx.kiq[0].ring.sched.ready)
- r = amdgpu_ring_test_helper(kiq_ring);
-
- return r;
-}
-
static int gfx_v12_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int r;
uint32_t tmp;
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
@@ -3553,8 +3556,7 @@ static int gfx_v12_0_hw_fini(void *handle)
if (!adev->no_hw_access) {
if (amdgpu_async_gfx_ring) {
- r = gfx_v12_0_kiq_disable_kgq(adev);
- if (r)
+ if (amdgpu_gfx_disable_kgq(adev, 0))
DRM_ERROR("KGQ disable failed\n");
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index b88a6fa173b3..2797fd84432b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -231,7 +231,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
/* This is necessary for SRIOV as well as for GFXOFF to function
* properly under bare metal
*/
- if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) &&
+ if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring[0].sched.ready) &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req,
1 << vmid, GET_INST(GC, 0));
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
index fd3ac483760e..edcb5351f8cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
@@ -299,7 +299,7 @@ static void gmc_v12_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
/* This is necessary for SRIOV as well as for GFXOFF to function
* properly under bare metal
*/
- if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) &&
+ if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring[0].sched.ready) &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
const unsigned eng = 17;
@@ -542,6 +542,23 @@ static unsigned gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev)
return 0;
}
+static unsigned int gmc_v12_0_get_dcc_alignment(struct amdgpu_device *adev)
+{
+ unsigned int max_tex_channel_caches, alignment;
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 0, 0) &&
+ amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 0, 1))
+ return 0;
+
+ max_tex_channel_caches = adev->gfx.config.max_texture_channel_caches;
+ if (is_power_of_2(max_tex_channel_caches))
+ alignment = (unsigned int)(max_tex_channel_caches / SZ_4);
+ else
+ alignment = roundup_pow_of_two(max_tex_channel_caches);
+
+ return (unsigned int)(alignment * max_tex_channel_caches * SZ_1K);
+}
+
static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = {
.flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb,
.flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid,
@@ -551,6 +568,7 @@ static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = {
.get_vm_pde = gmc_v12_0_get_vm_pde,
.get_vm_pte = gmc_v12_0_get_vm_pte,
.get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size,
+ .get_dcc_alignment = gmc_v12_0_get_dcc_alignment,
};
static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
index 99adf3625657..98aa3ccd0d20 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
@@ -538,11 +538,11 @@ void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+ amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8)));
amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+ amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8)));
amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0));
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index ad524ddc9760..6ae5a784e187 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -23,6 +23,7 @@
#include "amdgpu.h"
#include "amdgpu_jpeg.h"
+#include "amdgpu_cs.h"
#include "soc15.h"
#include "soc15d.h"
#include "jpeg_v4_0_3.h"
@@ -782,11 +783,15 @@ void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+
+ if (ring->funcs->parse_cs)
+ amdgpu_ring_write(ring, 0);
+ else
+ amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8)));
amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+ amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8)));
amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0));
@@ -1084,6 +1089,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = {
.get_rptr = jpeg_v4_0_3_dec_ring_get_rptr,
.get_wptr = jpeg_v4_0_3_dec_ring_get_wptr,
.set_wptr = jpeg_v4_0_3_dec_ring_set_wptr,
+ .parse_cs = jpeg_v4_0_3_dec_ring_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
@@ -1248,3 +1254,56 @@ static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev)
{
adev->jpeg.ras = &jpeg_v4_0_3_ras;
}
+
+/**
+ * jpeg_v4_0_3_dec_ring_parse_cs - command submission parser
+ *
+ * @parser: Command submission parser context
+ * @job: the job to parse
+ * @ib: the IB to parse
+ *
+ * Parse the command stream, return -EINVAL for invalid packet,
+ * 0 otherwise
+ */
+int jpeg_v4_0_3_dec_ring_parse_cs(struct amdgpu_cs_parser *parser,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib)
+{
+ uint32_t i, reg, res, cond, type;
+ struct amdgpu_device *adev = parser->adev;
+
+ for (i = 0; i < ib->length_dw ; i += 2) {
+ reg = CP_PACKETJ_GET_REG(ib->ptr[i]);
+ res = CP_PACKETJ_GET_RES(ib->ptr[i]);
+ cond = CP_PACKETJ_GET_COND(ib->ptr[i]);
+ type = CP_PACKETJ_GET_TYPE(ib->ptr[i]);
+
+ if (res) /* only support 0 at the moment */
+ return -EINVAL;
+
+ switch (type) {
+ case PACKETJ_TYPE0:
+ if (cond != PACKETJ_CONDITION_CHECK0 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) {
+ dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
+ return -EINVAL;
+ }
+ break;
+ case PACKETJ_TYPE3:
+ if (cond != PACKETJ_CONDITION_CHECK3 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) {
+ dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
+ return -EINVAL;
+ }
+ break;
+ case PACKETJ_TYPE6:
+ if (ib->ptr[i] == CP_PACKETJ_NOP)
+ continue;
+ dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
+ return -EINVAL;
+ default:
+ dev_err(adev->dev, "Unknown packet type %d !\n", type);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h
index 747a3e5f6856..71c54b294e15 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h
@@ -46,6 +46,9 @@
#define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000
+#define JPEG_REG_RANGE_START 0x4000
+#define JPEG_REG_RANGE_END 0x41c2
+
extern const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block;
void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring,
@@ -62,5 +65,7 @@ void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring);
void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
uint32_t val, uint32_t mask);
-
+int jpeg_v4_0_3_dec_ring_parse_cs(struct amdgpu_cs_parser *parser,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib);
#endif /* __JPEG_V4_0_3_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c
index d694a276498a..f4daff90c770 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c
@@ -646,6 +646,7 @@ static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = {
.get_rptr = jpeg_v5_0_0_dec_ring_get_rptr,
.get_wptr = jpeg_v5_0_0_dec_ring_get_wptr,
.set_wptr = jpeg_v5_0_0_dec_ring_set_wptr,
+ .parse_cs = jpeg_v4_0_3_dec_ring_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 8ce51b9236c1..2ea8223eb969 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -162,13 +162,13 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
union MESAPI__QUERY_MES_STATUS mes_status_pkt;
signed long timeout = 3000000; /* 3000 ms */
struct amdgpu_device *adev = mes->adev;
- struct amdgpu_ring *ring = &mes->ring;
+ struct amdgpu_ring *ring = &mes->ring[0];
struct MES_API_STATUS *api_status;
union MESAPI__MISC *x_pkt = pkt;
const char *op_str, *misc_op_str;
unsigned long flags;
u64 status_gpu_addr;
- u32 status_offset;
+ u32 seq, status_offset;
u64 *status_ptr;
signed long r;
int ret;
@@ -191,11 +191,18 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
status_ptr = (u64 *)&adev->wb.wb[status_offset];
*status_ptr = 0;
- spin_lock_irqsave(&mes->ring_lock, flags);
+ spin_lock_irqsave(&mes->ring_lock[0], flags);
r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4);
if (r)
goto error_unlock_free;
+ seq = ++ring->fence_drv.sync_seq;
+ r = amdgpu_fence_wait_polling(ring,
+ seq - ring->fence_drv.num_fences_mask,
+ timeout);
+ if (r < 1)
+ goto error_undo;
+
api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
api_status->api_completion_fence_addr = status_gpu_addr;
api_status->api_completion_fence_value = 1;
@@ -208,14 +215,13 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
mes_status_pkt.api_status.api_completion_fence_addr =
ring->fence_drv.gpu_addr;
- mes_status_pkt.api_status.api_completion_fence_value =
- ++ring->fence_drv.sync_seq;
+ mes_status_pkt.api_status.api_completion_fence_value = seq;
amdgpu_ring_write_multiple(ring, &mes_status_pkt,
sizeof(mes_status_pkt) / 4);
amdgpu_ring_commit(ring);
- spin_unlock_irqrestore(&mes->ring_lock, flags);
+ spin_unlock_irqrestore(&mes->ring_lock[0], flags);
op_str = mes_v11_0_get_op_string(x_pkt);
misc_op_str = mes_v11_0_get_misc_op_string(x_pkt);
@@ -229,7 +235,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
dev_dbg(adev->dev, "MES msg=%d was emitted\n",
x_pkt->header.opcode);
- r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout);
+ r = amdgpu_fence_wait_polling(ring, seq, timeout);
if (r < 1 || !*status_ptr) {
if (misc_op_str)
@@ -252,8 +258,12 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
amdgpu_device_wb_free(adev, status_offset);
return 0;
+error_undo:
+ dev_err(adev->dev, "MES ring buffer is full.\n");
+ amdgpu_ring_undo(ring);
+
error_unlock_free:
- spin_unlock_irqrestore(&mes->ring_lock, flags);
+ spin_unlock_irqrestore(&mes->ring_lock[0], flags);
error_wb_free:
amdgpu_device_wb_free(adev, status_offset);
@@ -512,9 +522,9 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub;
mes_set_hw_res_pkt.gds_size = adev->gds.gds_size;
mes_set_hw_res_pkt.paging_vmid = 0;
- mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr;
+ mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr[0];
mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr =
- mes->query_status_fence_gpu_addr;
+ mes->query_status_fence_gpu_addr[0];
for (i = 0; i < MAX_COMPUTE_PIPES; i++)
mes_set_hw_res_pkt.compute_hqd_mask[i] =
@@ -1015,7 +1025,7 @@ static int mes_v11_0_kiq_enable_queue(struct amdgpu_device *adev)
return r;
}
- kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring);
+ kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[0]);
return amdgpu_ring_test_helper(kiq_ring);
}
@@ -1029,7 +1039,7 @@ static int mes_v11_0_queue_init(struct amdgpu_device *adev,
if (pipe == AMDGPU_MES_KIQ_PIPE)
ring = &adev->gfx.kiq[0].ring;
else if (pipe == AMDGPU_MES_SCHED_PIPE)
- ring = &adev->mes.ring;
+ ring = &adev->mes.ring[0];
else
BUG();
@@ -1071,7 +1081,7 @@ static int mes_v11_0_ring_init(struct amdgpu_device *adev)
{
struct amdgpu_ring *ring;
- ring = &adev->mes.ring;
+ ring = &adev->mes.ring[0];
ring->funcs = &mes_v11_0_ring_funcs;
@@ -1124,7 +1134,7 @@ static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev,
if (pipe == AMDGPU_MES_KIQ_PIPE)
ring = &adev->gfx.kiq[0].ring;
else if (pipe == AMDGPU_MES_SCHED_PIPE)
- ring = &adev->mes.ring;
+ ring = &adev->mes.ring[0];
else
BUG();
@@ -1163,6 +1173,8 @@ static int mes_v11_0_sw_init(void *handle)
adev->mes.kiq_hw_init = &mes_v11_0_kiq_hw_init;
adev->mes.kiq_hw_fini = &mes_v11_0_kiq_hw_fini;
+ adev->mes.event_log_size = AMDGPU_MES_LOG_BUFFER_SIZE;
+
r = amdgpu_mes_init(adev);
if (r)
return r;
@@ -1198,9 +1210,6 @@ static int mes_v11_0_sw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int pipe;
- amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
- amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
-
for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
kfree(adev->mes.mqd_backup[pipe]);
@@ -1214,12 +1223,12 @@ static int mes_v11_0_sw_fini(void *handle)
&adev->gfx.kiq[0].ring.mqd_gpu_addr,
&adev->gfx.kiq[0].ring.mqd_ptr);
- amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj,
- &adev->mes.ring.mqd_gpu_addr,
- &adev->mes.ring.mqd_ptr);
+ amdgpu_bo_free_kernel(&adev->mes.ring[0].mqd_obj,
+ &adev->mes.ring[0].mqd_gpu_addr,
+ &adev->mes.ring[0].mqd_ptr);
amdgpu_ring_fini(&adev->gfx.kiq[0].ring);
- amdgpu_ring_fini(&adev->mes.ring);
+ amdgpu_ring_fini(&adev->mes.ring[0]);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
mes_v11_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE);
@@ -1330,9 +1339,9 @@ failure:
static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev)
{
- if (adev->mes.ring.sched.ready) {
- mes_v11_0_kiq_dequeue(&adev->mes.ring);
- adev->mes.ring.sched.ready = false;
+ if (adev->mes.ring[0].sched.ready) {
+ mes_v11_0_kiq_dequeue(&adev->mes.ring[0]);
+ adev->mes.ring[0].sched.ready = false;
}
if (amdgpu_sriov_vf(adev)) {
@@ -1350,7 +1359,7 @@ static int mes_v11_0_hw_init(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (adev->mes.ring.sched.ready)
+ if (adev->mes.ring[0].sched.ready)
goto out;
if (!adev->enable_mes_kiq) {
@@ -1395,7 +1404,7 @@ out:
* with MES enabled.
*/
adev->gfx.kiq[0].ring.sched.ready = false;
- adev->mes.ring.sched.ready = true;
+ adev->mes.ring[0].sched.ready = true;
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
index c9f74231ad59..e39a58d262c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
@@ -142,19 +142,20 @@ static const char *mes_v12_0_get_misc_op_string(union MESAPI__MISC *x_pkt)
}
static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
- void *pkt, int size,
- int api_status_off)
+ int pipe, void *pkt, int size,
+ int api_status_off)
{
union MESAPI__QUERY_MES_STATUS mes_status_pkt;
signed long timeout = 3000000; /* 3000 ms */
struct amdgpu_device *adev = mes->adev;
- struct amdgpu_ring *ring = &mes->ring;
+ struct amdgpu_ring *ring = &mes->ring[pipe];
+ spinlock_t *ring_lock = &mes->ring_lock[pipe];
struct MES_API_STATUS *api_status;
union MESAPI__MISC *x_pkt = pkt;
const char *op_str, *misc_op_str;
unsigned long flags;
u64 status_gpu_addr;
- u32 status_offset;
+ u32 seq, status_offset;
u64 *status_ptr;
signed long r;
int ret;
@@ -177,11 +178,18 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
status_ptr = (u64 *)&adev->wb.wb[status_offset];
*status_ptr = 0;
- spin_lock_irqsave(&mes->ring_lock, flags);
+ spin_lock_irqsave(ring_lock, flags);
r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4);
if (r)
goto error_unlock_free;
+ seq = ++ring->fence_drv.sync_seq;
+ r = amdgpu_fence_wait_polling(ring,
+ seq - ring->fence_drv.num_fences_mask,
+ timeout);
+ if (r < 1)
+ goto error_undo;
+
api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
api_status->api_completion_fence_addr = status_gpu_addr;
api_status->api_completion_fence_value = 1;
@@ -194,39 +202,39 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
mes_status_pkt.api_status.api_completion_fence_addr =
ring->fence_drv.gpu_addr;
- mes_status_pkt.api_status.api_completion_fence_value =
- ++ring->fence_drv.sync_seq;
+ mes_status_pkt.api_status.api_completion_fence_value = seq;
amdgpu_ring_write_multiple(ring, &mes_status_pkt,
sizeof(mes_status_pkt) / 4);
amdgpu_ring_commit(ring);
- spin_unlock_irqrestore(&mes->ring_lock, flags);
+ spin_unlock_irqrestore(ring_lock, flags);
op_str = mes_v12_0_get_op_string(x_pkt);
misc_op_str = mes_v12_0_get_misc_op_string(x_pkt);
if (misc_op_str)
- dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str,
- misc_op_str);
+ dev_dbg(adev->dev, "MES(%d) msg=%s (%s) was emitted\n",
+ pipe, op_str, misc_op_str);
else if (op_str)
- dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str);
+ dev_dbg(adev->dev, "MES(%d) msg=%s was emitted\n",
+ pipe, op_str);
else
- dev_dbg(adev->dev, "MES msg=%d was emitted\n",
- x_pkt->header.opcode);
+ dev_dbg(adev->dev, "MES(%d) msg=%d was emitted\n",
+ pipe, x_pkt->header.opcode);
- r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout);
+ r = amdgpu_fence_wait_polling(ring, seq, timeout);
if (r < 1 || !*status_ptr) {
if (misc_op_str)
- dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n",
- op_str, misc_op_str);
+ dev_err(adev->dev, "MES(%d) failed to respond to msg=%s (%s)\n",
+ pipe, op_str, misc_op_str);
else if (op_str)
- dev_err(adev->dev, "MES failed to respond to msg=%s\n",
- op_str);
+ dev_err(adev->dev, "MES(%d) failed to respond to msg=%s\n",
+ pipe, op_str);
else
- dev_err(adev->dev, "MES failed to respond to msg=%d\n",
- x_pkt->header.opcode);
+ dev_err(adev->dev, "MES(%d) failed to respond to msg=%d\n",
+ pipe, x_pkt->header.opcode);
while (halt_if_hws_hang)
schedule();
@@ -238,8 +246,12 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
amdgpu_device_wb_free(adev, status_offset);
return 0;
+error_undo:
+ dev_err(adev->dev, "MES ring buffer is full.\n");
+ amdgpu_ring_undo(ring);
+
error_unlock_free:
- spin_unlock_irqrestore(&mes->ring_lock, flags);
+ spin_unlock_irqrestore(ring_lock, flags);
error_wb_free:
amdgpu_device_wb_free(adev, status_offset);
@@ -254,6 +266,8 @@ static int convert_to_mes_queue_type(int queue_type)
return MES_QUEUE_TYPE_COMPUTE;
else if (queue_type == AMDGPU_RING_TYPE_SDMA)
return MES_QUEUE_TYPE_SDMA;
+ else if (queue_type == AMDGPU_RING_TYPE_MES)
+ return MES_QUEUE_TYPE_SCHQ;
else
BUG();
return -1;
@@ -311,6 +325,7 @@ static int mes_v12_0_add_hw_queue(struct amdgpu_mes *mes,
mes_add_queue_pkt.gds_size = input->queue_size;
return mes_v12_0_submit_pkt_and_poll_completion(mes,
+ AMDGPU_MES_SCHED_PIPE,
&mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
offsetof(union MESAPI__ADD_QUEUE, api_status));
}
@@ -330,6 +345,7 @@ static int mes_v12_0_remove_hw_queue(struct amdgpu_mes *mes,
mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
return mes_v12_0_submit_pkt_and_poll_completion(mes,
+ AMDGPU_MES_SCHED_PIPE,
&mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
offsetof(union MESAPI__REMOVE_QUEUE, api_status));
}
@@ -338,6 +354,7 @@ static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes,
struct mes_map_legacy_queue_input *input)
{
union MESAPI__ADD_QUEUE mes_add_queue_pkt;
+ int pipe;
memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
@@ -354,7 +371,12 @@ static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes,
convert_to_mes_queue_type(input->queue_type);
mes_add_queue_pkt.map_legacy_kq = 1;
- return mes_v12_0_submit_pkt_and_poll_completion(mes,
+ if (mes->adev->enable_uni_mes)
+ pipe = AMDGPU_MES_KIQ_PIPE;
+ else
+ pipe = AMDGPU_MES_SCHED_PIPE;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
&mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
offsetof(union MESAPI__ADD_QUEUE, api_status));
}
@@ -363,6 +385,7 @@ static int mes_v12_0_unmap_legacy_queue(struct amdgpu_mes *mes,
struct mes_unmap_legacy_queue_input *input)
{
union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
+ int pipe;
memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
@@ -387,7 +410,12 @@ static int mes_v12_0_unmap_legacy_queue(struct amdgpu_mes *mes,
convert_to_mes_queue_type(input->queue_type);
}
- return mes_v12_0_submit_pkt_and_poll_completion(mes,
+ if (mes->adev->enable_uni_mes)
+ pipe = AMDGPU_MES_KIQ_PIPE;
+ else
+ pipe = AMDGPU_MES_SCHED_PIPE;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
&mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
offsetof(union MESAPI__REMOVE_QUEUE, api_status));
}
@@ -404,7 +432,7 @@ static int mes_v12_0_resume_gang(struct amdgpu_mes *mes,
return 0;
}
-static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes)
+static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes, int pipe)
{
union MESAPI__QUERY_MES_STATUS mes_status_pkt;
@@ -414,7 +442,7 @@ static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes)
mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
- return mes_v12_0_submit_pkt_and_poll_completion(mes,
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
&mes_status_pkt, sizeof(mes_status_pkt),
offsetof(union MESAPI__QUERY_MES_STATUS, api_status));
}
@@ -423,6 +451,7 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes,
struct mes_misc_op_input *input)
{
union MESAPI__MISC misc_pkt;
+ int pipe;
memset(&misc_pkt, 0, sizeof(misc_pkt));
@@ -475,12 +504,17 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes,
return -EINVAL;
}
- return mes_v12_0_submit_pkt_and_poll_completion(mes,
+ if (mes->adev->enable_uni_mes)
+ pipe = AMDGPU_MES_KIQ_PIPE;
+ else
+ pipe = AMDGPU_MES_SCHED_PIPE;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
&misc_pkt, sizeof(misc_pkt),
offsetof(union MESAPI__MISC, api_status));
}
-static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes)
+static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes, int pipe)
{
union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_1_pkt;
@@ -491,12 +525,12 @@ static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes)
mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 100;
- return mes_v12_0_submit_pkt_and_poll_completion(mes,
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
&mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt),
offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status));
}
-static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes)
+static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe)
{
int i;
struct amdgpu_device *adev = mes->adev;
@@ -508,27 +542,33 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes)
mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC;
mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
- mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub;
- mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub;
- mes_set_hw_res_pkt.gds_size = adev->gds.gds_size;
- mes_set_hw_res_pkt.paging_vmid = 0;
- mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr;
- mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr =
- mes->query_status_fence_gpu_addr;
-
- for (i = 0; i < MAX_COMPUTE_PIPES; i++)
- mes_set_hw_res_pkt.compute_hqd_mask[i] =
- mes->compute_hqd_mask[i];
-
- for (i = 0; i < MAX_GFX_PIPES; i++)
- mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i];
-
- for (i = 0; i < MAX_SDMA_PIPES; i++)
- mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i];
+ if (pipe == AMDGPU_MES_SCHED_PIPE) {
+ mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub;
+ mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub;
+ mes_set_hw_res_pkt.gds_size = adev->gds.gds_size;
+ mes_set_hw_res_pkt.paging_vmid = 0;
+
+ for (i = 0; i < MAX_COMPUTE_PIPES; i++)
+ mes_set_hw_res_pkt.compute_hqd_mask[i] =
+ mes->compute_hqd_mask[i];
+
+ for (i = 0; i < MAX_GFX_PIPES; i++)
+ mes_set_hw_res_pkt.gfx_hqd_mask[i] =
+ mes->gfx_hqd_mask[i];
+
+ for (i = 0; i < MAX_SDMA_PIPES; i++)
+ mes_set_hw_res_pkt.sdma_hqd_mask[i] =
+ mes->sdma_hqd_mask[i];
+
+ for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
+ mes_set_hw_res_pkt.aggregated_doorbells[i] =
+ mes->aggregated_doorbells[i];
+ }
- for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
- mes_set_hw_res_pkt.aggregated_doorbells[i] =
- mes->aggregated_doorbells[i];
+ mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr =
+ mes->sch_ctx_gpu_addr[pipe];
+ mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr =
+ mes->query_status_fence_gpu_addr[pipe];
for (i = 0; i < 5; i++) {
mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i];
@@ -551,10 +591,12 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes)
mes_set_hw_res_pkt.oversubscription_timer = 50;
mes_set_hw_res_pkt.unmapped_doorbell_handling = 1;
- mes_set_hw_res_pkt.enable_mes_event_int_logging = 0;
- mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr;
+ if (amdgpu_mes_log_enable) {
+ mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
+ mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr;
+ }
- return mes_v12_0_submit_pkt_and_poll_completion(mes,
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
&mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
offsetof(union MESAPI_SET_HW_RESOURCES, api_status));
}
@@ -732,16 +774,11 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable)
if (enable) {
data = RREG32_SOC15(GC, 0, regCP_MES_CNTL);
data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
- data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET,
- (!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1);
WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
mutex_lock(&adev->srbm_mutex);
for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
- if ((!adev->enable_mes_kiq || adev->enable_uni_mes) &&
- pipe == AMDGPU_MES_KIQ_PIPE)
- continue;
-
soc21_grbm_select(adev, 3, pipe, 0, 0);
ucode_addr = adev->mes.uc_start_addr[pipe] >> 2;
@@ -755,8 +792,7 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable)
/* unhalt MES and activate pipe0 */
data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1);
- data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE,
- (!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1);
WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
if (amdgpu_emu_mode)
@@ -772,8 +808,7 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable)
data = REG_SET_FIELD(data, CP_MES_CNTL,
MES_INVALIDATE_ICACHE, 1);
data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
- data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET,
- (!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1);
data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1);
WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
}
@@ -788,10 +823,6 @@ static void mes_v12_0_set_ucode_start_addr(struct amdgpu_device *adev)
mutex_lock(&adev->srbm_mutex);
for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
- if ((!adev->enable_mes_kiq || adev->enable_uni_mes) &&
- pipe == AMDGPU_MES_KIQ_PIPE)
- continue;
-
/* me=3, queue=0 */
soc21_grbm_select(adev, 3, pipe, 0, 0);
@@ -1083,7 +1114,7 @@ static int mes_v12_0_kiq_enable_queue(struct amdgpu_device *adev)
return r;
}
- kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring);
+ kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[0]);
r = amdgpu_ring_test_ring(kiq_ring);
if (r) {
@@ -1099,14 +1130,12 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev,
struct amdgpu_ring *ring;
int r;
- if (pipe == AMDGPU_MES_KIQ_PIPE)
+ if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE)
ring = &adev->gfx.kiq[0].ring;
- else if (pipe == AMDGPU_MES_SCHED_PIPE)
- ring = &adev->mes.ring;
else
- BUG();
+ ring = &adev->mes.ring[pipe];
- if ((pipe == AMDGPU_MES_SCHED_PIPE) &&
+ if ((adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) &&
(amdgpu_in_reset(adev) || adev->in_suspend)) {
*(ring->wptr_cpu_addr) = 0;
*(ring->rptr_cpu_addr) = 0;
@@ -1118,13 +1147,12 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev,
return r;
if (pipe == AMDGPU_MES_SCHED_PIPE) {
- if (adev->enable_uni_mes) {
- mes_v12_0_queue_init_register(ring);
- } else {
+ if (adev->enable_uni_mes)
+ r = amdgpu_mes_map_legacy_queue(adev, ring);
+ else
r = mes_v12_0_kiq_enable_queue(adev);
- if (r)
- return r;
- }
+ if (r)
+ return r;
} else {
mes_v12_0_queue_init_register(ring);
}
@@ -1144,25 +1172,29 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev,
return 0;
}
-static int mes_v12_0_ring_init(struct amdgpu_device *adev)
+static int mes_v12_0_ring_init(struct amdgpu_device *adev, int pipe)
{
struct amdgpu_ring *ring;
- ring = &adev->mes.ring;
+ ring = &adev->mes.ring[pipe];
ring->funcs = &mes_v12_0_ring_funcs;
ring->me = 3;
- ring->pipe = 0;
+ ring->pipe = pipe;
ring->queue = 0;
ring->ring_obj = NULL;
ring->use_doorbell = true;
- ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1;
- ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_SCHED_PIPE];
+ ring->eop_gpu_addr = adev->mes.eop_gpu_addr[pipe];
ring->no_scheduler = true;
sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+ if (pipe == AMDGPU_MES_SCHED_PIPE)
+ ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1;
+ else
+ ring->doorbell_index = adev->doorbell_index.mes_ring1 << 1;
+
return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
AMDGPU_RING_PRIO_DEFAULT, NULL);
}
@@ -1176,7 +1208,7 @@ static int mes_v12_0_kiq_ring_init(struct amdgpu_device *adev)
ring = &adev->gfx.kiq[0].ring;
ring->me = 3;
- ring->pipe = adev->enable_uni_mes ? 0 : 1;
+ ring->pipe = 1;
ring->queue = 0;
ring->adev = NULL;
@@ -1198,12 +1230,10 @@ static int mes_v12_0_mqd_sw_init(struct amdgpu_device *adev,
int r, mqd_size = sizeof(struct v12_compute_mqd);
struct amdgpu_ring *ring;
- if (pipe == AMDGPU_MES_KIQ_PIPE)
+ if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE)
ring = &adev->gfx.kiq[0].ring;
- else if (pipe == AMDGPU_MES_SCHED_PIPE)
- ring = &adev->mes.ring;
else
- BUG();
+ ring = &adev->mes.ring[pipe];
if (ring->mqd_obj)
return 0;
@@ -1237,14 +1267,13 @@ static int mes_v12_0_sw_init(void *handle)
adev->mes.kiq_hw_init = &mes_v12_0_kiq_hw_init;
adev->mes.kiq_hw_fini = &mes_v12_0_kiq_hw_fini;
+ adev->mes.event_log_size = AMDGPU_MES_LOG_BUFFER_SIZE;
+
r = amdgpu_mes_init(adev);
if (r)
return r;
for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
- if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE)
- continue;
-
r = mes_v12_0_allocate_eop_buf(adev, pipe);
if (r)
return r;
@@ -1252,18 +1281,15 @@ static int mes_v12_0_sw_init(void *handle)
r = mes_v12_0_mqd_sw_init(adev, pipe);
if (r)
return r;
- }
- if (adev->enable_mes_kiq) {
- r = mes_v12_0_kiq_ring_init(adev);
+ if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE)
+ r = mes_v12_0_kiq_ring_init(adev);
+ else
+ r = mes_v12_0_ring_init(adev, pipe);
if (r)
return r;
}
- r = mes_v12_0_ring_init(adev);
- if (r)
- return r;
-
return 0;
}
@@ -1272,9 +1298,6 @@ static int mes_v12_0_sw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int pipe;
- amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
- amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
-
for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
kfree(adev->mes.mqd_backup[pipe]);
@@ -1282,18 +1305,21 @@ static int mes_v12_0_sw_fini(void *handle)
&adev->mes.eop_gpu_addr[pipe],
NULL);
amdgpu_ucode_release(&adev->mes.fw[pipe]);
- }
- amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj,
- &adev->gfx.kiq[0].ring.mqd_gpu_addr,
- &adev->gfx.kiq[0].ring.mqd_ptr);
-
- amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj,
- &adev->mes.ring.mqd_gpu_addr,
- &adev->mes.ring.mqd_ptr);
+ if (adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) {
+ amdgpu_bo_free_kernel(&adev->mes.ring[pipe].mqd_obj,
+ &adev->mes.ring[pipe].mqd_gpu_addr,
+ &adev->mes.ring[pipe].mqd_ptr);
+ amdgpu_ring_fini(&adev->mes.ring[pipe]);
+ }
+ }
- amdgpu_ring_fini(&adev->gfx.kiq[0].ring);
- amdgpu_ring_fini(&adev->mes.ring);
+ if (!adev->enable_uni_mes) {
+ amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj,
+ &adev->gfx.kiq[0].ring.mqd_gpu_addr,
+ &adev->gfx.kiq[0].ring.mqd_ptr);
+ amdgpu_ring_fini(&adev->gfx.kiq[0].ring);
+ }
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
mes_v12_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE);
@@ -1337,7 +1363,7 @@ static void mes_v12_0_kiq_dequeue_sched(struct amdgpu_device *adev)
soc21_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
- adev->mes.ring.sched.ready = false;
+ adev->mes.ring[0].sched.ready = false;
}
static void mes_v12_0_kiq_setting(struct amdgpu_ring *ring)
@@ -1358,10 +1384,10 @@ static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev)
{
int r = 0;
- mes_v12_0_kiq_setting(&adev->gfx.kiq[0].ring);
-
if (adev->enable_uni_mes)
- return mes_v12_0_hw_init(adev);
+ mes_v12_0_kiq_setting(&adev->mes.ring[AMDGPU_MES_KIQ_PIPE]);
+ else
+ mes_v12_0_kiq_setting(&adev->gfx.kiq[0].ring);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
@@ -1388,6 +1414,14 @@ static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev)
if (r)
goto failure;
+ if (adev->enable_uni_mes) {
+ r = mes_v12_0_set_hw_resources(&adev->mes, AMDGPU_MES_KIQ_PIPE);
+ if (r)
+ goto failure;
+
+ mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_KIQ_PIPE);
+ }
+
r = mes_v12_0_hw_init(adev);
if (r)
goto failure;
@@ -1401,9 +1435,15 @@ failure:
static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev)
{
- if (adev->mes.ring.sched.ready) {
- mes_v12_0_kiq_dequeue_sched(adev);
- adev->mes.ring.sched.ready = false;
+ if (adev->mes.ring[0].sched.ready) {
+ if (adev->enable_uni_mes)
+ amdgpu_mes_unmap_legacy_queue(adev,
+ &adev->mes.ring[AMDGPU_MES_SCHED_PIPE],
+ RESET_QUEUES, 0, 0);
+ else
+ mes_v12_0_kiq_dequeue_sched(adev);
+
+ adev->mes.ring[0].sched.ready = false;
}
mes_v12_0_enable(adev, false);
@@ -1416,10 +1456,10 @@ static int mes_v12_0_hw_init(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (adev->mes.ring.sched.ready)
+ if (adev->mes.ring[0].sched.ready)
goto out;
- if (!adev->enable_mes_kiq || adev->enable_uni_mes) {
+ if (!adev->enable_mes_kiq) {
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
r = mes_v12_0_load_microcode(adev,
AMDGPU_MES_SCHED_PIPE, true);
@@ -1439,23 +1479,23 @@ static int mes_v12_0_hw_init(void *handle)
mes_v12_0_enable(adev, true);
}
+ /* Enable the MES to handle doorbell ring on unmapped queue */
+ mes_v12_0_enable_unmapped_doorbell_handling(&adev->mes, true);
+
r = mes_v12_0_queue_init(adev, AMDGPU_MES_SCHED_PIPE);
if (r)
goto failure;
- r = mes_v12_0_set_hw_resources(&adev->mes);
+ r = mes_v12_0_set_hw_resources(&adev->mes, AMDGPU_MES_SCHED_PIPE);
if (r)
goto failure;
if (adev->enable_uni_mes)
- mes_v12_0_set_hw_resources_1(&adev->mes);
+ mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE);
mes_v12_0_init_aggregated_doorbell(&adev->mes);
- /* Enable the MES to handle doorbell ring on unmapped queue */
- mes_v12_0_enable_unmapped_doorbell_handling(&adev->mes, true);
-
- r = mes_v12_0_query_sched_status(&adev->mes);
+ r = mes_v12_0_query_sched_status(&adev->mes, AMDGPU_MES_SCHED_PIPE);
if (r) {
DRM_ERROR("MES is busy\n");
goto failure;
@@ -1468,7 +1508,7 @@ out:
* with MES enabled.
*/
adev->gfx.kiq[0].ring.sched.ready = false;
- adev->mes.ring.sched.ready = true;
+ adev->mes.ring[0].sched.ready = true;
return 0;
@@ -1511,17 +1551,7 @@ static int mes_v12_0_early_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int pipe, r;
- if (adev->enable_uni_mes) {
- r = amdgpu_mes_init_microcode(adev, AMDGPU_MES_SCHED_PIPE);
- if (!r)
- return 0;
-
- adev->enable_uni_mes = false;
- }
-
for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
- if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE)
- continue;
r = amdgpu_mes_init_microcode(adev, pipe);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c
index 5bbaa2b2caab..0fbc3be81f14 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c
@@ -80,7 +80,8 @@ static uint32_t mmhub_v4_1_0_get_invalidate_req(unsigned int vmid,
/* invalidate using legacy mode on vmid*/
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
PER_VMID_INVALIDATE_REQ, 1 << vmid);
- req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
+ /* Only use legacy inv on mmhub side */
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0);
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index af1e90159ce3..2e72d445415f 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -176,14 +176,16 @@ static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring)
DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
ring->doorbell_index, ring->wptr << 2);
WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
- /* SDMA seems to miss doorbells sometimes when powergating kicks in.
- * Updating the wptr directly will wake it. This is only safe because
- * we disallow gfxoff in begin_use() and then allow it again in end_use().
- */
- WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR),
- lower_32_bits(ring->wptr << 2));
- WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI),
- upper_32_bits(ring->wptr << 2));
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(5, 2, 1)) {
+ /* SDMA seems to miss doorbells sometimes when powergating kicks in.
+ * Updating the wptr directly will wake it. This is only safe because
+ * we disallow gfxoff in begin_use() and then allow it again in end_use().
+ */
+ WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR),
+ lower_32_bits(ring->wptr << 2));
+ WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI),
+ upper_32_bits(ring->wptr << 2));
+ }
} else {
DRM_DEBUG("Not using doorbell -- "
"mmSDMA%i_GFX_RB_WPTR == 0x%08x "
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
index 41b5e45697dc..ecee9e7d7e4c 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
@@ -1575,8 +1575,7 @@ static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib,
ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0) |
- SDMA_PKT_COPY_LINEAR_HEADER_CPV((copy_flags &
- (AMDGPU_COPY_FLAGS_READ_DECOMPRESSED | AMDGPU_COPY_FLAGS_WRITE_COMPRESSED)) ? 1 : 0);
+ SDMA_PKT_COPY_LINEAR_HEADER_CPV(1);
ib->ptr[ib->length_dw++] = byte_count - 1;
ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
@@ -1590,6 +1589,8 @@ static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib,
((copy_flags & AMDGPU_COPY_FLAGS_READ_DECOMPRESSED) ? SDMA_DCC_READ_CM(2) : 0) |
((copy_flags & AMDGPU_COPY_FLAGS_WRITE_COMPRESSED) ? SDMA_DCC_WRITE_CM(1) : 0) |
SDMA_DCC_MAX_COM(max_com) | SDMA_DCC_MAX_UCOM(1);
+ else
+ ib->ptr[ib->length_dw++] = 0;
}
/**
@@ -1616,7 +1617,7 @@ static void sdma_v7_0_emit_fill_buffer(struct amdgpu_ib *ib,
static const struct amdgpu_buffer_funcs sdma_v7_0_buffer_funcs = {
.copy_max_bytes = 0x400000,
- .copy_num_dw = 7,
+ .copy_num_dw = 8,
.emit_copy_buffer = sdma_v7_0_emit_copy_buffer,
.fill_max_bytes = 0x400000,
.fill_num_dw = 5,
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h
index 2357ff39323f..e74e1983da53 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15d.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h
@@ -76,6 +76,12 @@
((cond & 0xF) << 24) | \
((type & 0xF) << 28))
+#define CP_PACKETJ_NOP 0x60000000
+#define CP_PACKETJ_GET_REG(x) ((x) & 0x3FFFF)
+#define CP_PACKETJ_GET_RES(x) (((x) >> 18) & 0x3F)
+#define CP_PACKETJ_GET_COND(x) (((x) >> 24) & 0xF)
+#define CP_PACKETJ_GET_TYPE(x) (((x) >> 28) & 0xF)
+
/* Packet 3 types */
#define PACKET3_NOP 0x10
#define PACKET3_SET_BASE 0x11
diff --git a/drivers/gpu/drm/amd/amdgpu/soc24.c b/drivers/gpu/drm/amd/amdgpu/soc24.c
index 7d641d0dadba..b0c3678cfb31 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc24.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc24.c
@@ -406,6 +406,7 @@ static int soc24_common_early_init(void *handle)
AMD_CG_SUPPORT_ATHUB_MGCG |
AMD_CG_SUPPORT_ATHUB_LS |
AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_HDP_SD |
AMD_CG_SUPPORT_MC_LS;
adev->pg_flags = AMD_PG_SUPPORT_VCN |
AMD_PG_SUPPORT_JPEG |
@@ -424,6 +425,7 @@ static int soc24_common_early_init(void *handle)
AMD_CG_SUPPORT_ATHUB_MGCG |
AMD_CG_SUPPORT_ATHUB_LS |
AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_HDP_SD |
AMD_CG_SUPPORT_MC_LS;
adev->pg_flags = AMD_PG_SUPPORT_VCN |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c
index b7a08e7a4423..d163d92a692f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c
@@ -187,6 +187,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
m->cp_hqd_pq_control |=
ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
+ m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK;
pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 7e7929f24ae4..983a977632ff 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2893,6 +2893,9 @@ static int dm_suspend(void *handle)
hpd_rx_irq_work_suspend(dm);
+ if (adev->dm.dc->caps.ips_support)
+ dc_allow_idle_optimizations(adev->dm.dc, true);
+
dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D3);
dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D3);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 5442da90f508..2e9f6da1acdc 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -804,12 +804,25 @@ struct dsc_mst_fairness_params {
};
#if defined(CONFIG_DRM_AMD_DC_FP)
-static int kbps_to_peak_pbn(int kbps)
+static uint16_t get_fec_overhead_multiplier(struct dc_link *dc_link)
+{
+ u8 link_coding_cap;
+ uint16_t fec_overhead_multiplier_x1000 = PBN_FEC_OVERHEAD_MULTIPLIER_8B_10B;
+
+ link_coding_cap = dc_link_dp_mst_decide_link_encoding_format(dc_link);
+ if (link_coding_cap == DP_128b_132b_ENCODING)
+ fec_overhead_multiplier_x1000 = PBN_FEC_OVERHEAD_MULTIPLIER_128B_132B;
+
+ return fec_overhead_multiplier_x1000;
+}
+
+static int kbps_to_peak_pbn(int kbps, uint16_t fec_overhead_multiplier_x1000)
{
u64 peak_kbps = kbps;
peak_kbps *= 1006;
- peak_kbps = div_u64(peak_kbps, 1000);
+ peak_kbps *= fec_overhead_multiplier_x1000;
+ peak_kbps = div_u64(peak_kbps, 1000 * 1000);
return (int) DIV64_U64_ROUND_UP(peak_kbps * 64, (54 * 8 * 1000));
}
@@ -910,11 +923,12 @@ static int increase_dsc_bpp(struct drm_atomic_state *state,
int link_timeslots_used;
int fair_pbn_alloc;
int ret = 0;
+ uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link);
for (i = 0; i < count; i++) {
if (vars[i + k].dsc_enabled) {
initial_slack[i] =
- kbps_to_peak_pbn(params[i].bw_range.max_kbps) - vars[i + k].pbn;
+ kbps_to_peak_pbn(params[i].bw_range.max_kbps, fec_overhead_multiplier_x1000) - vars[i + k].pbn;
bpp_increased[i] = false;
remaining_to_increase += 1;
} else {
@@ -1010,6 +1024,7 @@ static int try_disable_dsc(struct drm_atomic_state *state,
int next_index;
int remaining_to_try = 0;
int ret;
+ uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link);
for (i = 0; i < count; i++) {
if (vars[i + k].dsc_enabled
@@ -1039,7 +1054,7 @@ static int try_disable_dsc(struct drm_atomic_state *state,
if (next_index == -1)
break;
- vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps);
+ vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps, fec_overhead_multiplier_x1000);
ret = drm_dp_atomic_find_time_slots(state,
params[next_index].port->mgr,
params[next_index].port,
@@ -1052,8 +1067,7 @@ static int try_disable_dsc(struct drm_atomic_state *state,
vars[next_index].dsc_enabled = false;
vars[next_index].bpp_x16 = 0;
} else {
- vars[next_index].pbn = kbps_to_peak_pbn(
- params[next_index].bw_range.max_kbps);
+ vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps, fec_overhead_multiplier_x1000);
ret = drm_dp_atomic_find_time_slots(state,
params[next_index].port->mgr,
params[next_index].port,
@@ -1082,6 +1096,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
int count = 0;
int i, k, ret;
bool debugfs_overwrite = false;
+ uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link);
memset(params, 0, sizeof(params));
@@ -1146,7 +1161,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
/* Try no compression */
for (i = 0; i < count; i++) {
vars[i + k].aconnector = params[i].aconnector;
- vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps);
+ vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps, fec_overhead_multiplier_x1000);
vars[i + k].dsc_enabled = false;
vars[i + k].bpp_x16 = 0;
ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, params[i].port,
@@ -1165,7 +1180,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
/* Try max compression */
for (i = 0; i < count; i++) {
if (params[i].compression_possible && params[i].clock_force_enable != DSC_CLK_FORCE_DISABLE) {
- vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps);
+ vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps, fec_overhead_multiplier_x1000);
vars[i + k].dsc_enabled = true;
vars[i + k].bpp_x16 = params[i].bw_range.min_target_bpp_x16;
ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr,
@@ -1173,7 +1188,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
if (ret < 0)
return ret;
} else {
- vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps);
+ vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps, fec_overhead_multiplier_x1000);
vars[i + k].dsc_enabled = false;
vars[i + k].bpp_x16 = 0;
ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr,
@@ -1270,6 +1285,9 @@ static bool is_dsc_need_re_compute(
}
}
+ if (new_stream_on_link_num == 0)
+ return false;
+
/* check current_state if there stream on link but it is not in
* new request state
*/
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
index fa84d34b7373..600d6e221011 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
@@ -46,6 +46,9 @@
#define SYNAPTICS_CASCADED_HUB_ID 0x5A
#define IS_SYNAPTICS_CASCADED_PANAMERA(devName, data) ((IS_SYNAPTICS_PANAMERA(devName) && ((int)data[2] == SYNAPTICS_CASCADED_HUB_ID)) ? 1 : 0)
+#define PBN_FEC_OVERHEAD_MULTIPLIER_8B_10B 1031
+#define PBN_FEC_OVERHEAD_MULTIPLIER_128B_132B 1000
+
enum mst_msg_ready_type {
NONE_MSG_RDY_EVENT = 0,
DOWN_REP_MSG_RDY_EVENT = 1,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index a83bd0331c3b..5cb11cc2d063 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -28,6 +28,7 @@
#include <drm/drm_blend.h>
#include <drm/drm_gem_atomic_helper.h>
#include <drm/drm_plane_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
#include <drm/drm_fourcc.h>
#include "amdgpu.h"
@@ -935,10 +936,14 @@ static int amdgpu_dm_plane_helper_prepare_fb(struct drm_plane *plane,
}
afb = to_amdgpu_framebuffer(new_state->fb);
- obj = new_state->fb->obj[0];
+ obj = drm_gem_fb_get_obj(new_state->fb, 0);
+ if (!obj) {
+ DRM_ERROR("Failed to get obj from framebuffer\n");
+ return -EINVAL;
+ }
+
rbo = gem_to_amdgpu_bo(obj);
adev = amdgpu_ttm_adev(rbo->tbo.bdev);
-
r = amdgpu_bo_reserve(rbo, true);
if (r) {
dev_err(adev->dev, "fail to reserve bo (%d)\n", r);
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
index be2638c763d7..9a406d74c0dd 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@@ -35,8 +35,10 @@
#include "dc_stream_priv.h"
#define DC_LOGGER dc->ctx->logger
+#ifndef MIN
#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
#define MAX(x, y) ((x > y) ? x : y)
+#endif
/*******************************************************************************
* Private functions
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c
index 2a21bcf5224f..4d960dc5ce89 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c
@@ -185,8 +185,7 @@ static bool dmub_replay_copy_settings(struct dmub_replay *dmub,
else
copy_settings_data->flags.bitfields.force_wakeup_by_tps3 = 0;
-
- dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
return true;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index 3c0222aa4df1..46f9c05de16e 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -83,6 +83,8 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_rcfla
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_rq_dlg_calc_32.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_mode_vba_314.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_rq_dlg_calc_314.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_rcflags)
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
index e06fc370267b..14a902ff3b8a 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
@@ -1402,6 +1402,8 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context)
if (hubbub && hubp) {
if (hubbub->funcs->program_det_size)
hubbub->funcs->program_det_size(hubbub, hubp->inst, 0);
+ if (hubbub->funcs->program_det_segments)
+ hubbub->funcs->program_det_segments(hubbub, hubp->inst, 0);
}
}
@@ -3587,7 +3589,7 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx)
(int)hubp->curs_attr.width || pos_cpy.x
<= (int)hubp->curs_attr.width +
pipe_ctx->plane_state->src_rect.x) {
- pos_cpy.x = temp_x + viewport_width;
+ pos_cpy.x = 2 * viewport_width - temp_x;
}
}
} else {
@@ -3680,7 +3682,7 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx)
(int)hubp->curs_attr.width || pos_cpy.x
<= (int)hubp->curs_attr.width +
pipe_ctx->plane_state->src_rect.x) {
- pos_cpy.x = 2 * viewport_width - temp_x;
+ pos_cpy.x = temp_x + viewport_width;
}
}
} else {
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
index e4f7078c1026..f115c7a285e7 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
@@ -771,6 +771,8 @@ void dcn35_init_pipes(struct dc *dc, struct dc_state *context)
if (hubbub && hubp) {
if (hubbub->funcs->program_det_size)
hubbub->funcs->program_det_size(hubbub, hubp->inst, 0);
+ if (hubbub->funcs->program_det_segments)
+ hubbub->funcs->program_det_segments(hubbub, hubp->inst, 0);
}
}
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
index 9a3cc0514a36..8e0588b1cf30 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
@@ -1778,6 +1778,9 @@ static bool dcn321_resource_construct(
dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
dc->caps.color.mpc.ocsc = 1;
+ /* Use pipe context based otg sync logic */
+ dc->config.use_pipe_ctx_sync_logic = true;
+
dc->config.dc_mode_clk_limit_support = true;
dc->config.enable_windowed_mpo_odm = true;
/* read VBIOS LTTPR caps */
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c
index a05a2209a44e..34b02147881d 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c
@@ -723,6 +723,7 @@ static const struct dc_debug_options debug_defaults_drv = {
.min_prefetch_in_strobe_ns = 60000, // 60us
.disable_unbounded_requesting = false,
.enable_legacy_fast_update = false,
+ .dcc_meta_propagation_delay_us = 10,
.fams2_config = {
.bits = {
.enable = true,
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h
index 26efeada4f41..bb46f30d11d0 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h
@@ -138,7 +138,9 @@ void dcn401_prepare_mcache_programming(struct dc *dc, struct dc_state *context);
SRI_ARR(DCHUBP_MALL_CONFIG, HUBP, id), \
SRI_ARR(DCHUBP_VMPG_CONFIG, HUBP, id), \
SRI_ARR(UCLK_PSTATE_FORCE, HUBPREQ, id), \
- HUBP_3DLUT_FL_REG_LIST_DCN401(id)
+ HUBP_3DLUT_FL_REG_LIST_DCN401(id), \
+ SRI_ARR(DCSURF_VIEWPORT_MCACHE_SPLIT_COORDINATE, HUBP, id), \
+ SRI_ARR(DCHUBP_MCACHEID_CONFIG, HUBP, id)
/* ABM */
#define ABM_DCN401_REG_LIST_RI(id) \
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c
index 7ecf76aea950..6e064e6ae949 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c
@@ -25,7 +25,9 @@
#include "hdcp.h"
+#ifndef MIN
#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#endif
#define HDCP_I2C_ADDR 0x3a /* 0x74 >> 1*/
#define KSV_READ_SIZE 0xf /* 0x6803b - 0x6802c */
#define HDCP_MAX_AUX_TRANSACTION_SIZE 16
diff --git a/drivers/gpu/drm/amd/include/discovery.h b/drivers/gpu/drm/amd/include/discovery.h
index 46bf19c9c5c4..710e328fad48 100644
--- a/drivers/gpu/drm/amd/include/discovery.h
+++ b/drivers/gpu/drm/amd/include/discovery.h
@@ -258,6 +258,48 @@ struct gc_info_v1_2 {
uint32_t gc_gl2c_per_gpu;
};
+struct gc_info_v1_3 {
+ struct gpu_info_header header;
+ uint32_t gc_num_se;
+ uint32_t gc_num_wgp0_per_sa;
+ uint32_t gc_num_wgp1_per_sa;
+ uint32_t gc_num_rb_per_se;
+ uint32_t gc_num_gl2c;
+ uint32_t gc_num_gprs;
+ uint32_t gc_num_max_gs_thds;
+ uint32_t gc_gs_table_depth;
+ uint32_t gc_gsprim_buff_depth;
+ uint32_t gc_parameter_cache_depth;
+ uint32_t gc_double_offchip_lds_buffer;
+ uint32_t gc_wave_size;
+ uint32_t gc_max_waves_per_simd;
+ uint32_t gc_max_scratch_slots_per_cu;
+ uint32_t gc_lds_size;
+ uint32_t gc_num_sc_per_se;
+ uint32_t gc_num_sa_per_se;
+ uint32_t gc_num_packer_per_sc;
+ uint32_t gc_num_gl2a;
+ uint32_t gc_num_tcp_per_sa;
+ uint32_t gc_num_sdp_interface;
+ uint32_t gc_num_tcps;
+ uint32_t gc_num_tcp_per_wpg;
+ uint32_t gc_tcp_l1_size;
+ uint32_t gc_num_sqc_per_wgp;
+ uint32_t gc_l1_instruction_cache_size_per_sqc;
+ uint32_t gc_l1_data_cache_size_per_sqc;
+ uint32_t gc_gl1c_per_sa;
+ uint32_t gc_gl1c_size_per_instance;
+ uint32_t gc_gl2c_per_gpu;
+ uint32_t gc_tcp_size_per_cu;
+ uint32_t gc_tcp_cache_line_size;
+ uint32_t gc_instruction_cache_size_per_sqc;
+ uint32_t gc_instruction_cache_line_size;
+ uint32_t gc_scalar_data_cache_size_per_sqc;
+ uint32_t gc_scalar_data_cache_line_size;
+ uint32_t gc_tcc_size;
+ uint32_t gc_tcc_cache_line_size;
+};
+
struct gc_info_v2_0 {
struct gpu_info_header header;
diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h b/drivers/gpu/drm/amd/include/mes_v11_api_def.h
index b72d5d362251..21ceafce1f9b 100644
--- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h
+++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h
@@ -28,6 +28,9 @@
#define MES_API_VERSION 1
+/* Maximum log buffer size for MES. Needs to be updated if MES expands MES_EVT_INTR_HIST_LOG */
+#define AMDGPU_MES_LOG_BUFFER_SIZE 0x4000
+
/* Driver submits one API(cmd) as a single Frame and this command size is same
* for all API to ease the debugging and parsing of ring buffer.
*/
diff --git a/drivers/gpu/drm/amd/include/mes_v12_api_def.h b/drivers/gpu/drm/amd/include/mes_v12_api_def.h
index ffd67c6ed9b3..101e2fe962c6 100644
--- a/drivers/gpu/drm/amd/include/mes_v12_api_def.h
+++ b/drivers/gpu/drm/amd/include/mes_v12_api_def.h
@@ -28,6 +28,9 @@
#define MES_API_VERSION 0x14
+/* Maximum log buffer size for MES. Needs to be updated if MES expands MES_EVT_INTR_HIST_LOG_12 */
+#define AMDGPU_MES_LOG_BUFFER_SIZE 0xC000
+
/* Driver submits one API(cmd) as a single Frame and this command size is same for all API
* to ease the debugging and parsing of ring buffer.
*/
@@ -94,6 +97,7 @@ enum MES_QUEUE_TYPE {
MES_QUEUE_TYPE_SDMA,
MES_QUEUE_TYPE_MAX,
+ MES_QUEUE_TYPE_SCHQ = MES_QUEUE_TYPE_MAX,
};
struct MES_API_STATUS {
@@ -239,8 +243,12 @@ union MESAPI_SET_HW_RESOURCES {
uint32_t send_write_data : 1;
uint32_t os_tdr_timeout_override : 1;
uint32_t use_rs64mem_for_proc_gang_ctx : 1;
+ uint32_t halt_on_misaligned_access : 1;
+ uint32_t use_add_queue_unmap_flag_addr : 1;
+ uint32_t enable_mes_sch_stb_log : 1;
+ uint32_t limit_single_process : 1;
uint32_t unmapped_doorbell_handling: 2;
- uint32_t reserved : 15;
+ uint32_t reserved : 11;
};
uint32_t uint32_all;
};
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index a1b8a82d77cf..8b7d6ed7e2ed 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -618,7 +618,8 @@ int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_versio
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
int r = 0;
- if (!pp_funcs || !pp_funcs->load_firmware || adev->flags & AMD_IS_APU)
+ if (!pp_funcs || !pp_funcs->load_firmware ||
+ (is_support_sw_smu(adev) && (adev->flags & AMD_IS_APU)))
return 0;
mutex_lock(&adev->pm.mutex);
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppevvmath.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppevvmath.h
index 6f54c410c2f9..409aeec6baa9 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppevvmath.h
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppevvmath.h
@@ -22,12 +22,18 @@
*/
#include <asm/div64.h>
-#define SHIFT_AMOUNT 16 /* We multiply all original integers with 2^SHIFT_AMOUNT to get the fInt representation */
+enum ppevvmath_constants {
+ /* We multiply all original integers with 2^SHIFT_AMOUNT to get the fInt representation */
+ SHIFT_AMOUNT = 16,
-#define PRECISION 5 /* Change this value to change the number of decimal places in the final output - 5 is a good default */
+ /* Change this value to change the number of decimal places in the final output - 5 is a good default */
+ PRECISION = 5,
-#define SHIFTED_2 (2 << SHIFT_AMOUNT)
-#define MAX (1 << (SHIFT_AMOUNT - 1)) - 1 /* 32767 - Might change in the future */
+ SHIFTED_2 = (2 << SHIFT_AMOUNT),
+
+ /* 32767 - Might change in the future */
+ MAX = (1 << (SHIFT_AMOUNT - 1)) - 1,
+};
/* -------------------------------------------------------------------------------
* NEW TYPE - fINT
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 9d7454b3c314..74e35f8ddefc 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -2224,8 +2224,9 @@ static int smu_bump_power_profile_mode(struct smu_context *smu,
}
static int smu_adjust_power_state_dynamic(struct smu_context *smu,
- enum amd_dpm_forced_level level,
- bool skip_display_settings)
+ enum amd_dpm_forced_level level,
+ bool skip_display_settings,
+ bool force_update)
{
int ret = 0;
int index = 0;
@@ -2254,7 +2255,7 @@ static int smu_adjust_power_state_dynamic(struct smu_context *smu,
}
}
- if (smu_dpm_ctx->dpm_level != level) {
+ if (force_update || smu_dpm_ctx->dpm_level != level) {
ret = smu_asic_set_performance_level(smu, level);
if (ret) {
dev_err(smu->adev->dev, "Failed to set performance level!");
@@ -2265,13 +2266,12 @@ static int smu_adjust_power_state_dynamic(struct smu_context *smu,
smu_dpm_ctx->dpm_level = level;
}
- if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL &&
- smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) {
+ if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) {
index = fls(smu->workload_mask);
index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0;
workload[0] = smu->workload_setting[index];
- if (smu->power_profile_mode != workload[0])
+ if (force_update || smu->power_profile_mode != workload[0])
smu_bump_power_profile_mode(smu, workload, 0);
}
@@ -2292,11 +2292,13 @@ static int smu_handle_task(struct smu_context *smu,
ret = smu_pre_display_config_changed(smu);
if (ret)
return ret;
- ret = smu_adjust_power_state_dynamic(smu, level, false);
+ ret = smu_adjust_power_state_dynamic(smu, level, false, false);
break;
case AMD_PP_TASK_COMPLETE_INIT:
+ ret = smu_adjust_power_state_dynamic(smu, level, true, true);
+ break;
case AMD_PP_TASK_READJUST_POWER_STATE:
- ret = smu_adjust_power_state_dynamic(smu, level, true);
+ ret = smu_adjust_power_state_dynamic(smu, level, true, false);
break;
default:
break;
@@ -2343,8 +2345,7 @@ static int smu_switch_power_profile(void *handle,
workload[0] = smu->workload_setting[index];
}
- if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL &&
- smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM)
+ if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM)
smu_bump_power_profile_mode(smu, workload, 0);
return 0;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h
index de2e442281ff..87ca5ceb1ece 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h
@@ -92,7 +92,6 @@
//Resets
#define PPSMC_MSG_PrepareMp1ForUnload 0x2E
-#define PPSMC_MSG_Mode1Reset 0x2F
//Set SystemVirtual DramAddrHigh
#define PPSMC_MSG_SetSystemVirtualDramAddrHigh 0x30
@@ -119,11 +118,12 @@
//STB to dram log
#define PPSMC_MSG_DumpSTBtoDram 0x3D
-#define PPSMC_MSG_STBtoDramLogSetDramAddrHigh 0x3E
-#define PPSMC_MSG_STBtoDramLogSetDramAddrLow 0x3F
+#define PPSMC_MSG_STBtoDramLogSetDramAddress 0x3E
+#define PPSMC_MSG_DummyUndefined 0x3F
#define PPSMC_MSG_STBtoDramLogSetDramSize 0x40
#define PPSMC_MSG_SetOBMTraceBufferLogging 0x41
+#define PPSMC_MSG_UseProfilingMode 0x42
#define PPSMC_MSG_AllowGfxDcs 0x43
#define PPSMC_MSG_DisallowGfxDcs 0x44
#define PPSMC_MSG_EnableAudioStutterWA 0x45
@@ -135,6 +135,16 @@
#define PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel 0x4B
#define PPSMC_MSG_SetPriorityDeltaGain 0x4C
#define PPSMC_MSG_AllowIHHostInterrupt 0x4D
+#define PPSMC_MSG_EnableShadowDpm 0x4E
#define PPSMC_MSG_Mode3Reset 0x4F
-#define PPSMC_Message_Count 0x50
+#define PPSMC_MSG_SetDriverDramAddr 0x50
+#define PPSMC_MSG_SetToolsDramAddr 0x51
+#define PPSMC_MSG_TransferTableSmu2DramWithAddr 0x52
+#define PPSMC_MSG_TransferTableDram2SmuWithAddr 0x53
+#define PPSMC_MSG_GetAllRunningSmuFeatures 0x54
+#define PPSMC_MSG_GetSvi3Voltage 0x55
+#define PPSMC_MSG_UpdatePolicy 0x56
+#define PPSMC_MSG_ExtPwrConnSupport 0x57
+#define PPSMC_MSG_PreloadSwPstateForUclkOverDrive 0x58
+#define PPSMC_Message_Count 0x59
#endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h
index 4a3fde89aed7..75c921e87360 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h
@@ -27,7 +27,8 @@
#pragma pack(push, 1)
-#define SMU_14_0_2_TABLE_FORMAT_REVISION 3
+#define SMU_14_0_2_TABLE_FORMAT_REVISION 23
+#define SMU_14_0_2_CUSTOM_TABLE_FORMAT_REVISION 1
// POWERPLAYTABLE::ulPlatformCaps
#define SMU_14_0_2_PP_PLATFORM_CAP_POWERPLAY 0x1 // This cap indicates whether CCC need to show Powerplay page.
@@ -43,6 +44,7 @@
#define SMU_14_0_2_PP_THERMALCONTROLLER_NONE 0
#define SMU_14_0_2_PP_OVERDRIVE_VERSION 0x1 // TODO: FIX OverDrive Version TBD
+#define SMU_14_0_2_PP_CUSTOM_OVERDRIVE_VERSION 0x1
#define SMU_14_0_2_PP_POWERSAVINGCLOCK_VERSION 0x01 // Power Saving Clock Table Version 1.00
enum SMU_14_0_2_OD_SW_FEATURE_CAP
@@ -107,6 +109,7 @@ enum SMU_14_0_2_PWRMODE_SETTING
SMU_14_0_2_PMSETTING_ACOUSTIC_LIMIT_RPM_BALANCE,
SMU_14_0_2_PMSETTING_ACOUSTIC_LIMIT_RPM_TURBO,
SMU_14_0_2_PMSETTING_ACOUSTIC_LIMIT_RPM_RAGE,
+ SMU_14_0_2_PMSETTING_COUNT
};
#define SMU_14_0_2_MAX_PMSETTING 32 // Maximum Number of PowerMode Settings
@@ -127,17 +130,24 @@ struct smu_14_0_2_overdrive_table
int16_t pm_setting[SMU_14_0_2_MAX_PMSETTING]; // Optimized power mode feature settings
};
+enum smu_14_0_3_pptable_source {
+ PPTABLE_SOURCE_IFWI = 0,
+ PPTABLE_SOURCE_DRIVER_HARDCODED = 1,
+ PPTABLE_SOURCE_PPGEN_REGISTRY = 2,
+ PPTABLE_SOURCE_MAX = PPTABLE_SOURCE_PPGEN_REGISTRY,
+};
+
struct smu_14_0_2_powerplay_table
{
struct atom_common_table_header header; // header.format_revision = 3 (HAS TO MATCH SMU_14_0_2_TABLE_FORMAT_REVISION), header.content_revision = ? structuresize is calculated by PPGen.
uint8_t table_revision; // PPGen use only: table_revision = 3
- uint8_t padding; // Padding 1 byte to align table_size offset to 6 bytes (pmfw_start_offset, for PMFW to know the starting offset of PPTable_t).
+ uint8_t pptable_source; // PPGen UI dropdown box
uint16_t pmfw_pptable_start_offset; // The start offset of the pmfw portion. i.e. start of PPTable_t (start of SkuTable_t)
uint16_t pmfw_pptable_size; // The total size of pmfw_pptable, i.e PPTable_t.
- uint16_t pmfw_pfe_table_start_offset; // The start offset of the PFE_Settings_t within pmfw_pptable.
- uint16_t pmfw_pfe_table_size; // The size of PFE_Settings_t.
- uint16_t pmfw_board_table_start_offset; // The start offset of the BoardTable_t within pmfw_pptable.
- uint16_t pmfw_board_table_size; // The size of BoardTable_t.
+ uint16_t pmfw_sku_table_start_offset; // DO NOT CHANGE ORDER; The absolute start offset of the SkuTable_t (within smu_14_0_3_powerplay_table).
+ uint16_t pmfw_sku_table_size; // DO NOT CHANGE ORDER; The size of SkuTable_t.
+ uint16_t pmfw_board_table_start_offset; // The start offset of the BoardTable_t
+ uint16_t pmfw_board_table_size; // The size of BoardTable_t.
uint16_t pmfw_custom_sku_table_start_offset; // The start offset of the CustomSkuTable_t within pmfw_pptable.
uint16_t pmfw_custom_sku_table_size; // The size of the CustomSkuTable_t.
uint32_t golden_pp_id; // PPGen use only: PP Table ID on the Golden Data Base
@@ -159,6 +169,36 @@ struct smu_14_0_2_powerplay_table
PPTable_t smc_pptable; // PPTable_t in driver_if.h -- as requested by PMFW, this offset should start at a 32-byte boundary, and the table_size above should remain at offset=6 bytes
};
+enum SMU_14_0_2_CUSTOM_OD_SW_FEATURE_CAP {
+ SMU_14_0_2_CUSTOM_ODCAP_POWER_MODE = 0,
+ SMU_14_0_2_CUSTOM_ODCAP_COUNT
+};
+
+enum SMU_14_0_2_CUSTOM_OD_FEATURE_SETTING_ID {
+ SMU_14_0_2_CUSTOM_ODSETTING_POWER_MODE = 0,
+ SMU_14_0_2_CUSTOM_ODSETTING_COUNT,
+};
+
+struct smu_14_0_2_custom_overdrive_table {
+ uint8_t revision;
+ uint8_t reserve[3];
+ uint8_t cap[SMU_14_0_2_CUSTOM_ODCAP_COUNT];
+ int32_t max[SMU_14_0_2_CUSTOM_ODSETTING_COUNT];
+ int32_t min[SMU_14_0_2_CUSTOM_ODSETTING_COUNT];
+ int16_t pm_setting[SMU_14_0_2_PMSETTING_COUNT];
+};
+
+struct smu_14_0_3_custom_powerplay_table {
+ uint8_t custom_table_revision;
+ uint16_t custom_table_size;
+ uint16_t custom_sku_table_offset;
+ uint32_t custom_platform_caps;
+ uint16_t software_shutdown_temp;
+ struct smu_14_0_2_custom_overdrive_table custom_overdrive_table;
+ uint32_t reserve[8];
+ CustomSkuTable_t custom_sku_table_pmfw;
+};
+
#pragma pack(pop)
#endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 78c3f94bb3ff..9974c9f8135e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -121,6 +121,7 @@ struct mca_ras_info {
#define P2S_TABLE_ID_A 0x50325341
#define P2S_TABLE_ID_X 0x50325358
+#define P2S_TABLE_ID_3 0x50325303
// clang-format off
static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COUNT] = {
@@ -271,14 +272,18 @@ static int smu_v13_0_6_init_microcode(struct smu_context *smu)
struct amdgpu_device *adev = smu->adev;
uint32_t p2s_table_id = P2S_TABLE_ID_A;
int ret = 0, i, p2stable_count;
+ int var = (adev->pdev->device & 0xF);
char ucode_prefix[15];
/* No need to load P2S tables in IOV mode */
if (amdgpu_sriov_vf(adev))
return 0;
- if (!(adev->flags & AMD_IS_APU))
+ if (!(adev->flags & AMD_IS_APU)) {
p2s_table_id = P2S_TABLE_ID_X;
+ if (var == 0x5)
+ p2s_table_id = P2S_TABLE_ID_3;
+ }
amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix,
sizeof(ucode_prefix));
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index a7d0231727e8..7bc95c404377 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -2378,7 +2378,7 @@ static int smu_v13_0_7_get_power_profile_mode(struct smu_context *smu, char *buf
size += sysfs_emit_at(buf, size, " ");
for (i = 0; i <= PP_SMC_POWER_PROFILE_WINDOW3D; i++)
- size += sysfs_emit_at(buf, size, "%-14s%s", amdgpu_pp_profile_name[i],
+ size += sysfs_emit_at(buf, size, "%d %-14s%s", i, amdgpu_pp_profile_name[i],
(i == smu->power_profile_mode) ? "* " : " ");
size += sysfs_emit_at(buf, size, "\n");
@@ -2408,7 +2408,7 @@ static int smu_v13_0_7_get_power_profile_mode(struct smu_context *smu, char *buf
do { \
size += sysfs_emit_at(buf, size, "%-30s", #field); \
for (j = 0; j <= PP_SMC_POWER_PROFILE_WINDOW3D; j++) \
- size += sysfs_emit_at(buf, size, "%-16d", activity_monitor_external[j].DpmActivityMonitorCoeffInt.field); \
+ size += sysfs_emit_at(buf, size, "%-18d", activity_monitor_external[j].DpmActivityMonitorCoeffInt.field); \
size += sysfs_emit_at(buf, size, "\n"); \
} while (0)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
index 98ea58d792ca..0c09b8c4ff49 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
@@ -66,6 +66,7 @@
#define MP0_MP1_DATA_REGION_SIZE_COMBOPPTABLE 0x4000
#define DEBUGSMC_MSG_Mode1Reset 2
+#define LINK_SPEED_MAX 3
static struct cmn2asic_msg_mapping smu_v14_0_2_message_map[SMU_MSG_MAX_COUNT] = {
MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1),
@@ -114,7 +115,6 @@ static struct cmn2asic_msg_mapping smu_v14_0_2_message_map[SMU_MSG_MAX_COUNT] =
MSG_MAP(SetMGpuFanBoostLimitRpm, PPSMC_MSG_SetMGpuFanBoostLimitRpm, 0),
MSG_MAP(GetPptLimit, PPSMC_MSG_GetPptLimit, 0),
MSG_MAP(NotifyPowerSource, PPSMC_MSG_NotifyPowerSource, 0),
- MSG_MAP(Mode1Reset, PPSMC_MSG_Mode1Reset, 0),
MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareMp1ForUnload, 0),
MSG_MAP(DFCstateControl, PPSMC_MSG_SetExternalClientDfCstateAllow, 0),
MSG_MAP(ArmD3, PPSMC_MSG_ArmD3, 0),
@@ -221,7 +221,6 @@ static struct cmn2asic_mapping smu_v14_0_2_workload_map[PP_SMC_POWER_PROFILE_COU
WORKLOAD_MAP(PP_SMC_POWER_PROFILE_WINDOW3D, WORKLOAD_PPLIB_WINDOW_3D_BIT),
};
-#if 0
static const uint8_t smu_v14_0_2_throttler_map[] = {
[THROTTLER_PPT0_BIT] = (SMU_THROTTLER_PPT0_BIT),
[THROTTLER_PPT1_BIT] = (SMU_THROTTLER_PPT1_BIT),
@@ -241,7 +240,6 @@ static const uint8_t smu_v14_0_2_throttler_map[] = {
[THROTTLER_GFX_APCC_PLUS_BIT] = (SMU_THROTTLER_APCC_BIT),
[THROTTLER_FIT_BIT] = (SMU_THROTTLER_FIT_BIT),
};
-#endif
static int
smu_v14_0_2_get_allowed_feature_mask(struct smu_context *smu,
@@ -1825,48 +1823,86 @@ static void smu_v14_0_2_set_smu_mailbox_registers(struct smu_context *smu)
smu->debug_resp_reg = SOC15_REG_OFFSET(MP1, 0, regMP1_SMN_C2PMSG_54);
}
-static int smu_v14_0_2_smu_send_bad_mem_page_num(struct smu_context *smu,
- uint32_t size)
+static ssize_t smu_v14_0_2_get_gpu_metrics(struct smu_context *smu,
+ void **table)
{
+ struct smu_table_context *smu_table = &smu->smu_table;
+ struct gpu_metrics_v1_3 *gpu_metrics =
+ (struct gpu_metrics_v1_3 *)smu_table->gpu_metrics_table;
+ SmuMetricsExternal_t metrics_ext;
+ SmuMetrics_t *metrics = &metrics_ext.SmuMetrics;
int ret = 0;
- /* message SMU to update the bad page number on SMUBUS */
- ret = smu_cmn_send_smc_msg_with_param(smu,
- SMU_MSG_SetNumBadMemoryPagesRetired,
- size, NULL);
+ ret = smu_cmn_get_metrics_table(smu,
+ &metrics_ext,
+ true);
if (ret)
- dev_err(smu->adev->dev,
- "[%s] failed to message SMU to update bad memory pages number\n",
- __func__);
+ return ret;
- return ret;
-}
+ smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 3);
-static int smu_v14_0_2_send_bad_mem_channel_flag(struct smu_context *smu,
- uint32_t size)
-{
- int ret = 0;
+ gpu_metrics->temperature_edge = metrics->AvgTemperature[TEMP_EDGE];
+ gpu_metrics->temperature_hotspot = metrics->AvgTemperature[TEMP_HOTSPOT];
+ gpu_metrics->temperature_mem = metrics->AvgTemperature[TEMP_MEM];
+ gpu_metrics->temperature_vrgfx = metrics->AvgTemperature[TEMP_VR_GFX];
+ gpu_metrics->temperature_vrsoc = metrics->AvgTemperature[TEMP_VR_SOC];
+ gpu_metrics->temperature_vrmem = max(metrics->AvgTemperature[TEMP_VR_MEM0],
+ metrics->AvgTemperature[TEMP_VR_MEM1]);
- /* message SMU to update the bad channel info on SMUBUS */
- ret = smu_cmn_send_smc_msg_with_param(smu,
- SMU_MSG_SetBadMemoryPagesRetiredFlagsPerChannel,
- size, NULL);
- if (ret)
- dev_err(smu->adev->dev,
- "[%s] failed to message SMU to update bad memory pages channel info\n",
- __func__);
+ gpu_metrics->average_gfx_activity = metrics->AverageGfxActivity;
+ gpu_metrics->average_umc_activity = metrics->AverageUclkActivity;
+ gpu_metrics->average_mm_activity = max(metrics->Vcn0ActivityPercentage,
+ metrics->Vcn1ActivityPercentage);
- return ret;
-}
+ gpu_metrics->average_socket_power = metrics->AverageSocketPower;
+ gpu_metrics->energy_accumulator = metrics->EnergyAccumulator;
-static ssize_t smu_v14_0_2_get_ecc_info(struct smu_context *smu,
- void *table)
-{
- int ret = 0;
+ if (metrics->AverageGfxActivity <= SMU_14_0_2_BUSY_THRESHOLD)
+ gpu_metrics->average_gfxclk_frequency = metrics->AverageGfxclkFrequencyPostDs;
+ else
+ gpu_metrics->average_gfxclk_frequency = metrics->AverageGfxclkFrequencyPreDs;
- // TODO
+ if (metrics->AverageUclkActivity <= SMU_14_0_2_BUSY_THRESHOLD)
+ gpu_metrics->average_uclk_frequency = metrics->AverageMemclkFrequencyPostDs;
+ else
+ gpu_metrics->average_uclk_frequency = metrics->AverageMemclkFrequencyPreDs;
+
+ gpu_metrics->average_vclk0_frequency = metrics->AverageVclk0Frequency;
+ gpu_metrics->average_dclk0_frequency = metrics->AverageDclk0Frequency;
+ gpu_metrics->average_vclk1_frequency = metrics->AverageVclk1Frequency;
+ gpu_metrics->average_dclk1_frequency = metrics->AverageDclk1Frequency;
+
+ gpu_metrics->current_gfxclk = gpu_metrics->average_gfxclk_frequency;
+ gpu_metrics->current_socclk = metrics->CurrClock[PPCLK_SOCCLK];
+ gpu_metrics->current_uclk = metrics->CurrClock[PPCLK_UCLK];
+ gpu_metrics->current_vclk0 = metrics->CurrClock[PPCLK_VCLK_0];
+ gpu_metrics->current_dclk0 = metrics->CurrClock[PPCLK_DCLK_0];
+ gpu_metrics->current_vclk1 = metrics->CurrClock[PPCLK_VCLK_0];
+ gpu_metrics->current_dclk1 = metrics->CurrClock[PPCLK_DCLK_0];
+
+ gpu_metrics->throttle_status =
+ smu_v14_0_2_get_throttler_status(metrics);
+ gpu_metrics->indep_throttle_status =
+ smu_cmn_get_indep_throttler_status(gpu_metrics->throttle_status,
+ smu_v14_0_2_throttler_map);
+
+ gpu_metrics->current_fan_speed = metrics->AvgFanRpm;
+
+ gpu_metrics->pcie_link_width = metrics->PcieWidth;
+ if ((metrics->PcieRate - 1) > LINK_SPEED_MAX)
+ gpu_metrics->pcie_link_speed = pcie_gen_to_speed(1);
+ else
+ gpu_metrics->pcie_link_speed = pcie_gen_to_speed(metrics->PcieRate);
- return ret;
+ gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
+
+ gpu_metrics->voltage_gfx = metrics->AvgVoltage[SVI_PLANE_VDD_GFX];
+ gpu_metrics->voltage_soc = metrics->AvgVoltage[SVI_PLANE_VDD_SOC];
+ gpu_metrics->voltage_mem = metrics->AvgVoltage[SVI_PLANE_VDDIO_MEM];
+
+ *table = (void *)gpu_metrics;
+
+ return sizeof(struct gpu_metrics_v1_3);
}
static const struct pptable_funcs smu_v14_0_2_ppt_funcs = {
@@ -1905,6 +1941,7 @@ static const struct pptable_funcs smu_v14_0_2_ppt_funcs = {
.enable_thermal_alert = smu_v14_0_enable_thermal_alert,
.disable_thermal_alert = smu_v14_0_disable_thermal_alert,
.notify_memory_pool_location = smu_v14_0_notify_memory_pool_location,
+ .get_gpu_metrics = smu_v14_0_2_get_gpu_metrics,
.set_soft_freq_limited_range = smu_v14_0_set_soft_freq_limited_range,
.init_pptable_microcode = smu_v14_0_init_pptable_microcode,
.populate_umd_state_clk = smu_v14_0_2_populate_umd_state_clk,
@@ -1933,12 +1970,9 @@ static const struct pptable_funcs smu_v14_0_2_ppt_funcs = {
.enable_gfx_features = smu_v14_0_2_enable_gfx_features,
.set_mp1_state = smu_v14_0_2_set_mp1_state,
.set_df_cstate = smu_v14_0_2_set_df_cstate,
- .send_hbm_bad_pages_num = smu_v14_0_2_smu_send_bad_mem_page_num,
- .send_hbm_bad_channel_flag = smu_v14_0_2_send_bad_mem_channel_flag,
#if 0
.gpo_control = smu_v14_0_gpo_control,
#endif
- .get_ecc_info = smu_v14_0_2_get_ecc_info,
};
void smu_v14_0_2_set_ppt_funcs(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
index 88eefef05fae..91ad434bcdae 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
@@ -794,7 +794,7 @@ static const char *smu_get_feature_name(struct smu_context *smu,
size_t smu_cmn_get_pp_feature_mask(struct smu_context *smu,
char *buf)
{
- int8_t sort_feature[max(SMU_FEATURE_COUNT, SMU_FEATURE_MAX)];
+ int8_t sort_feature[MAX(SMU_FEATURE_COUNT, SMU_FEATURE_MAX)];
uint64_t feature_mask;
int i, feature_index;
uint32_t count = 0;
diff --git a/drivers/gpu/drm/ast/ast_dp.c b/drivers/gpu/drm/ast/ast_dp.c
index 1e9259416980..e6c7f0d64e99 100644
--- a/drivers/gpu/drm/ast/ast_dp.c
+++ b/drivers/gpu/drm/ast/ast_dp.c
@@ -158,7 +158,14 @@ void ast_dp_launch(struct drm_device *dev)
ASTDP_HOST_EDID_READ_DONE);
}
+bool ast_dp_power_is_on(struct ast_device *ast)
+{
+ u8 vgacre3;
+
+ vgacre3 = ast_get_index_reg(ast, AST_IO_VGACRI, 0xe3);
+ return !(vgacre3 & AST_DP_PHY_SLEEP);
+}
void ast_dp_power_on_off(struct drm_device *dev, bool on)
{
diff --git a/drivers/gpu/drm/ast/ast_drv.c b/drivers/gpu/drm/ast/ast_drv.c
index aae019e79bda..225817087b4d 100644
--- a/drivers/gpu/drm/ast/ast_drv.c
+++ b/drivers/gpu/drm/ast/ast_drv.c
@@ -391,6 +391,11 @@ static int ast_drm_freeze(struct drm_device *dev)
static int ast_drm_thaw(struct drm_device *dev)
{
+ struct ast_device *ast = to_ast_device(dev);
+
+ ast_enable_vga(ast->ioregs);
+ ast_open_key(ast->ioregs);
+ ast_enable_mmio(dev->dev, ast->ioregs);
ast_post_gpu(dev);
return drm_mode_config_helper_resume(dev);
diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h
index ba3d86973995..47bab5596c16 100644
--- a/drivers/gpu/drm/ast/ast_drv.h
+++ b/drivers/gpu/drm/ast/ast_drv.h
@@ -472,6 +472,7 @@ void ast_init_3rdtx(struct drm_device *dev);
bool ast_astdp_is_connected(struct ast_device *ast);
int ast_astdp_read_edid(struct drm_device *dev, u8 *ediddata);
void ast_dp_launch(struct drm_device *dev);
+bool ast_dp_power_is_on(struct ast_device *ast);
void ast_dp_power_on_off(struct drm_device *dev, bool no);
void ast_dp_set_on_off(struct drm_device *dev, bool no);
void ast_dp_set_mode(struct drm_crtc *crtc, struct ast_vbios_mode_info *vbios_mode);
diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c
index dc8f639e82fd..049ee1477c33 100644
--- a/drivers/gpu/drm/ast/ast_mode.c
+++ b/drivers/gpu/drm/ast/ast_mode.c
@@ -28,6 +28,7 @@
* Authors: Dave Airlie <airlied@redhat.com>
*/
+#include <linux/delay.h>
#include <linux/export.h>
#include <linux/pci.h>
@@ -1687,11 +1688,35 @@ static int ast_astdp_connector_helper_detect_ctx(struct drm_connector *connector
struct drm_modeset_acquire_ctx *ctx,
bool force)
{
+ struct drm_device *dev = connector->dev;
struct ast_device *ast = to_ast_device(connector->dev);
+ enum drm_connector_status status = connector_status_disconnected;
+ struct drm_connector_state *connector_state = connector->state;
+ bool is_active = false;
+
+ mutex_lock(&ast->modeset_lock);
+
+ if (connector_state && connector_state->crtc) {
+ struct drm_crtc_state *crtc_state = connector_state->crtc->state;
+
+ if (crtc_state && crtc_state->active)
+ is_active = true;
+ }
+
+ if (!is_active && !ast_dp_power_is_on(ast)) {
+ ast_dp_power_on_off(dev, true);
+ msleep(50);
+ }
if (ast_astdp_is_connected(ast))
- return connector_status_connected;
- return connector_status_disconnected;
+ status = connector_status_connected;
+
+ if (!is_active && status == connector_status_disconnected)
+ ast_dp_power_on_off(dev, false);
+
+ mutex_unlock(&ast->modeset_lock);
+
+ return status;
}
static const struct drm_connector_helper_funcs ast_astdp_connector_helper_funcs = {
diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c
index 22bbb2d83e30..7936c2023955 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -1070,21 +1070,17 @@ int drm_atomic_set_property(struct drm_atomic_state *state,
break;
}
- if (async_flip && prop != config->prop_fb_id) {
+ if (async_flip &&
+ (plane_state->plane->type != DRM_PLANE_TYPE_PRIMARY ||
+ (prop != config->prop_fb_id &&
+ prop != config->prop_in_fence_fd &&
+ prop != config->prop_fb_damage_clips))) {
ret = drm_atomic_plane_get_property(plane, plane_state,
prop, &old_val);
ret = drm_atomic_check_prop_changes(ret, old_val, prop_value, prop);
break;
}
- if (async_flip && plane_state->plane->type != DRM_PLANE_TYPE_PRIMARY) {
- drm_dbg_atomic(prop->dev,
- "[OBJECT:%d] Only primary planes can be changed during async flip\n",
- obj->id);
- ret = -EINVAL;
- break;
- }
-
ret = drm_atomic_plane_set_property(plane,
plane_state, file_priv,
prop, prop_value);
diff --git a/drivers/gpu/drm/drm_bridge_connector.c b/drivers/gpu/drm/drm_bridge_connector.c
index 0869b663f17e..a4fbf1eb7ac5 100644
--- a/drivers/gpu/drm/drm_bridge_connector.c
+++ b/drivers/gpu/drm/drm_bridge_connector.c
@@ -443,10 +443,8 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm,
panel_bridge = bridge;
}
- if (connector_type == DRM_MODE_CONNECTOR_Unknown) {
- kfree(bridge_connector);
+ if (connector_type == DRM_MODE_CONNECTOR_Unknown)
return ERR_PTR(-EINVAL);
- }
if (bridge_connector->bridge_hdmi)
ret = drmm_connector_hdmi_init(drm, connector,
@@ -461,10 +459,8 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm,
ret = drmm_connector_init(drm, connector,
&drm_bridge_connector_funcs,
connector_type, ddc);
- if (ret) {
- kfree(bridge_connector);
+ if (ret)
return ERR_PTR(ret);
- }
drm_connector_helper_add(connector, &drm_bridge_connector_helper_funcs);
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 6a8e45e9d0ec..103c185bb1c8 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -851,6 +851,7 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
* drm_buddy_block_trim - free unused pages
*
* @mm: DRM buddy manager
+ * @start: start address to begin the trimming.
* @new_size: original size requested
* @blocks: Input and output list of allocated blocks.
* MUST contain single block as input to be trimmed.
@@ -866,11 +867,13 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
* 0 on success, error code on failure.
*/
int drm_buddy_block_trim(struct drm_buddy *mm,
+ u64 *start,
u64 new_size,
struct list_head *blocks)
{
struct drm_buddy_block *parent;
struct drm_buddy_block *block;
+ u64 block_start, block_end;
LIST_HEAD(dfs);
u64 new_start;
int err;
@@ -882,6 +885,9 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
struct drm_buddy_block,
link);
+ block_start = drm_buddy_block_offset(block);
+ block_end = block_start + drm_buddy_block_size(mm, block);
+
if (WARN_ON(!drm_buddy_block_is_allocated(block)))
return -EINVAL;
@@ -894,6 +900,20 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
if (new_size == drm_buddy_block_size(mm, block))
return 0;
+ new_start = block_start;
+ if (start) {
+ new_start = *start;
+
+ if (new_start < block_start)
+ return -EINVAL;
+
+ if (!IS_ALIGNED(new_start, mm->chunk_size))
+ return -EINVAL;
+
+ if (range_overflows(new_start, new_size, block_end))
+ return -EINVAL;
+ }
+
list_del(&block->link);
mark_free(mm, block);
mm->avail += drm_buddy_block_size(mm, block);
@@ -904,7 +924,6 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
parent = block->parent;
block->parent = NULL;
- new_start = drm_buddy_block_offset(block);
list_add(&block->tmp_link, &dfs);
err = __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL);
if (err) {
@@ -1066,7 +1085,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
} while (1);
/* Trim the allocated block to the required size */
- if (original_size != size) {
+ if (!(flags & DRM_BUDDY_TRIM_DISABLE) &&
+ original_size != size) {
struct list_head *trim_list;
LIST_HEAD(temp);
u64 trim_size;
@@ -1083,6 +1103,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
}
drm_buddy_block_trim(mm,
+ NULL,
trim_size,
trim_list);
diff --git a/drivers/gpu/drm/drm_client.c b/drivers/gpu/drm/drm_client.c
index 2803ac111bbd..bfedcbf516db 100644
--- a/drivers/gpu/drm/drm_client.c
+++ b/drivers/gpu/drm/drm_client.c
@@ -355,7 +355,7 @@ int drm_client_buffer_vmap_local(struct drm_client_buffer *buffer,
err_drm_gem_vmap_unlocked:
drm_gem_unlock(gem);
- return 0;
+ return ret;
}
EXPORT_SYMBOL(drm_client_buffer_vmap_local);
diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c
index 31af5cf37a09..cee5eafbfb81 100644
--- a/drivers/gpu/drm/drm_client_modeset.c
+++ b/drivers/gpu/drm/drm_client_modeset.c
@@ -880,6 +880,11 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width,
kfree(modeset->mode);
modeset->mode = drm_mode_duplicate(dev, mode);
+ if (!modeset->mode) {
+ ret = -ENOMEM;
+ break;
+ }
+
drm_connector_get(connector);
modeset->connectors[modeset->num_connectors++] = connector;
modeset->x = offset->x;
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 18565ec68451..56ac37ea2f27 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -624,6 +624,17 @@ static void drm_fb_helper_add_damage_clip(struct drm_fb_helper *helper, u32 x, u
static void drm_fb_helper_damage(struct drm_fb_helper *helper, u32 x, u32 y,
u32 width, u32 height)
{
+ /*
+ * This function may be invoked by panic() to flush the frame
+ * buffer, where all CPUs except the panic CPU are stopped.
+ * During the following schedule_work(), the panic CPU needs
+ * the worker_pool lock, which might be held by a stopped CPU,
+ * causing schedule_work() and panic() to block. Return early on
+ * oops_in_progress to prevent this blocking.
+ */
+ if (oops_in_progress)
+ return;
+
drm_fb_helper_add_damage_clip(helper, x, y, width, height);
schedule_work(&helper->damage_work);
diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c
index 3f84d7527793..0830cae9a4d0 100644
--- a/drivers/gpu/drm/drm_panel_orientation_quirks.c
+++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c
@@ -208,6 +208,18 @@ static const struct dmi_system_id orientation_data[] = {
DMI_MATCH(DMI_BOARD_NAME, "KUN"),
},
.driver_data = (void *)&lcd1600x2560_rightside_up,
+ }, { /* AYN Loki Max */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ayn"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Loki Max"),
+ },
+ .driver_data = (void *)&lcd1080x1920_leftside_up,
+ }, { /* AYN Loki Zero */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ayn"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Loki Zero"),
+ },
+ .driver_data = (void *)&lcd1080x1920_leftside_up,
}, { /* Chuwi HiBook (CWI514) */
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "Hampoo"),
@@ -414,6 +426,12 @@ static const struct dmi_system_id orientation_data[] = {
DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ONE XPLAYER"),
},
.driver_data = (void *)&lcd1600x2560_leftside_up,
+ }, { /* OrangePi Neo */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "OrangePi"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "NEO-01"),
+ },
+ .driver_data = (void *)&lcd1200x1920_rightside_up,
}, { /* Samsung GalaxyBook 10.6 */
.matches = {
DMI_EXACT_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."),
diff --git a/drivers/gpu/drm/i915/display/intel_backlight.c b/drivers/gpu/drm/i915/display/intel_backlight.c
index 071668bfe5d1..6c3333136737 100644
--- a/drivers/gpu/drm/i915/display/intel_backlight.c
+++ b/drivers/gpu/drm/i915/display/intel_backlight.c
@@ -1449,6 +1449,9 @@ bxt_setup_backlight(struct intel_connector *connector, enum pipe unused)
static int cnp_num_backlight_controllers(struct drm_i915_private *i915)
{
+ if (INTEL_PCH_TYPE(i915) >= PCH_MTL)
+ return 2;
+
if (INTEL_PCH_TYPE(i915) >= PCH_DG1)
return 1;
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 59f11af3b0a1..dc75a929d3ed 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -5935,6 +5935,18 @@ intel_dp_detect(struct drm_connector *connector,
else
status = connector_status_disconnected;
+ if (status != connector_status_disconnected &&
+ !intel_dp_mst_verify_dpcd_state(intel_dp))
+ /*
+ * This requires retrying detection for instance to re-enable
+ * the MST mode that got reset via a long HPD pulse. The retry
+ * will happen either via the hotplug handler's retry logic,
+ * ensured by setting the connector here to SST/disconnected,
+ * or via a userspace connector probing in response to the
+ * hotplug uevent sent when removing the MST connectors.
+ */
+ status = connector_status_disconnected;
+
if (status == connector_status_disconnected) {
memset(&intel_dp->compliance, 0, sizeof(intel_dp->compliance));
memset(intel_connector->dp.dsc_dpcd, 0, sizeof(intel_connector->dp.dsc_dpcd));
diff --git a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c
index 2edffe62f360..b0101d72b9c1 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c
@@ -39,7 +39,9 @@ static u32 transcoder_to_stream_enc_status(enum transcoder cpu_transcoder)
static void intel_dp_hdcp_wait_for_cp_irq(struct intel_connector *connector,
int timeout)
{
- struct intel_hdcp *hdcp = &connector->hdcp;
+ struct intel_digital_port *dig_port = intel_attached_dig_port(connector);
+ struct intel_dp *dp = &dig_port->dp;
+ struct intel_hdcp *hdcp = &dp->attached_connector->hdcp;
long ret;
#define C (hdcp->cp_irq_count_cached != atomic_read(&hdcp->cp_irq_count))
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 27ce5c3f5951..17978a1f9ab0 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -1998,3 +1998,43 @@ bool intel_dp_mst_crtc_needs_modeset(struct intel_atomic_state *state,
return false;
}
+
+/*
+ * intel_dp_mst_verify_dpcd_state - verify the MST SW enabled state wrt. the DPCD
+ * @intel_dp: DP port object
+ *
+ * Verify if @intel_dp's MST enabled SW state matches the corresponding DPCD
+ * state. A long HPD pulse - not long enough to be detected as a disconnected
+ * state - could've reset the DPCD state, which requires tearing
+ * down/recreating the MST topology.
+ *
+ * Returns %true if the SW MST enabled and DPCD states match, %false
+ * otherwise.
+ */
+bool intel_dp_mst_verify_dpcd_state(struct intel_dp *intel_dp)
+{
+ struct intel_display *display = to_intel_display(intel_dp);
+ struct intel_connector *connector = intel_dp->attached_connector;
+ struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
+ struct intel_encoder *encoder = &dig_port->base;
+ int ret;
+ u8 val;
+
+ if (!intel_dp->is_mst)
+ return true;
+
+ ret = drm_dp_dpcd_readb(intel_dp->mst_mgr.aux, DP_MSTM_CTRL, &val);
+
+ /* Adjust the expected register value for SST + SideBand. */
+ if (ret < 0 || val != (DP_MST_EN | DP_UP_REQ_EN | DP_UPSTREAM_IS_SRC)) {
+ drm_dbg_kms(display->drm,
+ "[CONNECTOR:%d:%s][ENCODER:%d:%s] MST mode got reset, removing topology (ret=%d, ctrl=0x%02x)\n",
+ connector->base.base.id, connector->base.name,
+ encoder->base.base.id, encoder->base.name,
+ ret, val);
+
+ return false;
+ }
+
+ return true;
+}
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.h b/drivers/gpu/drm/i915/display/intel_dp_mst.h
index 8ca1d599091c..9e4c7679f1c3 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.h
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.h
@@ -27,5 +27,6 @@ int intel_dp_mst_atomic_check_link(struct intel_atomic_state *state,
struct intel_link_bw_limits *limits);
bool intel_dp_mst_crtc_needs_modeset(struct intel_atomic_state *state,
struct intel_crtc *crtc);
+bool intel_dp_mst_verify_dpcd_state(struct intel_dp *intel_dp);
#endif /* __INTEL_DP_MST_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
index 90998b037349..292d163036b1 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
@@ -1658,7 +1658,7 @@ static void skl_wrpll_params_populate(struct skl_wrpll_params *params,
}
static int
-skl_ddi_calculate_wrpll(int clock /* in Hz */,
+skl_ddi_calculate_wrpll(int clock,
int ref_clock,
struct skl_wrpll_params *wrpll_params)
{
@@ -1683,7 +1683,7 @@ skl_ddi_calculate_wrpll(int clock /* in Hz */,
};
unsigned int dco, d, i;
unsigned int p0, p1, p2;
- u64 afe_clock = clock * 5; /* AFE Clock is 5x Pixel clock */
+ u64 afe_clock = (u64)clock * 1000 * 5; /* AFE Clock is 5x Pixel clock, in Hz */
for (d = 0; d < ARRAY_SIZE(dividers); d++) {
for (dco = 0; dco < ARRAY_SIZE(dco_central_freq); dco++) {
@@ -1808,7 +1808,7 @@ static int skl_ddi_hdmi_pll_dividers(struct intel_crtc_state *crtc_state)
struct skl_wrpll_params wrpll_params = {};
int ret;
- ret = skl_ddi_calculate_wrpll(crtc_state->port_clock * 1000,
+ ret = skl_ddi_calculate_wrpll(crtc_state->port_clock,
i915->display.dpll.ref_clks.nssc, &wrpll_params);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/i915/display/intel_hdcp_regs.h b/drivers/gpu/drm/i915/display/intel_hdcp_regs.h
index a568a457e532..f590d7f48ba7 100644
--- a/drivers/gpu/drm/i915/display/intel_hdcp_regs.h
+++ b/drivers/gpu/drm/i915/display/intel_hdcp_regs.h
@@ -251,7 +251,7 @@
#define HDCP2_STREAM_STATUS(dev_priv, trans, port) \
(TRANS_HDCP(dev_priv) ? \
TRANS_HDCP2_STREAM_STATUS(trans) : \
- PIPE_HDCP2_STREAM_STATUS(pipe))
+ PIPE_HDCP2_STREAM_STATUS(port))
#define _PORTA_HDCP2_AUTH_STREAM 0x66F00
#define _PORTB_HDCP2_AUTH_STREAM 0x66F04
diff --git a/drivers/gpu/drm/i915/display/intel_pps.c b/drivers/gpu/drm/i915/display/intel_pps.c
index 42306bc4ba86..7ce926241e83 100644
--- a/drivers/gpu/drm/i915/display/intel_pps.c
+++ b/drivers/gpu/drm/i915/display/intel_pps.c
@@ -351,6 +351,9 @@ static int intel_num_pps(struct drm_i915_private *i915)
if (IS_GEMINILAKE(i915) || IS_BROXTON(i915))
return 2;
+ if (INTEL_PCH_TYPE(i915) >= PCH_MTL)
+ return 2;
+
if (INTEL_PCH_TYPE(i915) >= PCH_DG1)
return 1;
diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c
index eae5b5e09aa8..931d2cf74ed8 100644
--- a/drivers/gpu/drm/i915/display/vlv_dsi.c
+++ b/drivers/gpu/drm/i915/display/vlv_dsi.c
@@ -1870,7 +1870,6 @@ static const struct dmi_system_id vlv_dsi_dmi_quirk_table[] = {
/* Lenovo Yoga Tab 3 Pro YT3-X90F */
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Intel Corporation"),
- DMI_MATCH(DMI_PRODUCT_NAME, "CHERRYVIEW D1 PLATFORM"),
DMI_MATCH(DMI_PRODUCT_VERSION, "Blade3-10A-001"),
},
.driver_data = (void *)vlv_dsi_lenovo_yoga_tab3_backlight_fixup,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index a2195e28b625..cac6d4184506 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -290,6 +290,41 @@ out:
return i915_error_to_vmf_fault(err);
}
+static void set_address_limits(struct vm_area_struct *area,
+ struct i915_vma *vma,
+ unsigned long obj_offset,
+ unsigned long *start_vaddr,
+ unsigned long *end_vaddr)
+{
+ unsigned long vm_start, vm_end, vma_size; /* user's memory parameters */
+ long start, end; /* memory boundaries */
+
+ /*
+ * Let's move into the ">> PAGE_SHIFT"
+ * domain to be sure not to lose bits
+ */
+ vm_start = area->vm_start >> PAGE_SHIFT;
+ vm_end = area->vm_end >> PAGE_SHIFT;
+ vma_size = vma->size >> PAGE_SHIFT;
+
+ /*
+ * Calculate the memory boundaries by considering the offset
+ * provided by the user during memory mapping and the offset
+ * provided for the partial mapping.
+ */
+ start = vm_start;
+ start -= obj_offset;
+ start += vma->gtt_view.partial.offset;
+ end = start + vma_size;
+
+ start = max_t(long, start, vm_start);
+ end = min_t(long, end, vm_end);
+
+ /* Let's move back into the "<< PAGE_SHIFT" domain */
+ *start_vaddr = (unsigned long)start << PAGE_SHIFT;
+ *end_vaddr = (unsigned long)end << PAGE_SHIFT;
+}
+
static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
{
#define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT)
@@ -302,14 +337,18 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
bool write = area->vm_flags & VM_WRITE;
struct i915_gem_ww_ctx ww;
+ unsigned long obj_offset;
+ unsigned long start, end; /* memory boundaries */
intel_wakeref_t wakeref;
struct i915_vma *vma;
pgoff_t page_offset;
+ unsigned long pfn;
int srcu;
int ret;
- /* We don't use vmf->pgoff since that has the fake offset */
+ obj_offset = area->vm_pgoff - drm_vma_node_start(&mmo->vma_node);
page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
+ page_offset += obj_offset;
trace_i915_gem_object_fault(obj, page_offset, true, write);
@@ -402,12 +441,14 @@ retry:
if (ret)
goto err_unpin;
+ set_address_limits(area, vma, obj_offset, &start, &end);
+
+ pfn = (ggtt->gmadr.start + i915_ggtt_offset(vma)) >> PAGE_SHIFT;
+ pfn += (start - area->vm_start) >> PAGE_SHIFT;
+ pfn += obj_offset - vma->gtt_view.partial.offset;
+
/* Finally, remap it using the new GTT offset */
- ret = remap_io_mapping(area,
- area->vm_start + (vma->gtt_view.partial.offset << PAGE_SHIFT),
- (ggtt->gmadr.start + i915_ggtt_offset(vma)) >> PAGE_SHIFT,
- min_t(u64, vma->size, area->vm_end - area->vm_start),
- &ggtt->iomap);
+ ret = remap_io_mapping(area, start, pfn, end - start, &ggtt->iomap);
if (ret)
goto err_fence;
@@ -1084,6 +1125,8 @@ int i915_gem_fb_mmap(struct drm_i915_gem_object *obj, struct vm_area_struct *vma
mmo = mmap_offset_attach(obj, mmap_type, NULL);
if (IS_ERR(mmo))
return PTR_ERR(mmo);
+
+ vma->vm_pgoff += drm_vma_node_start(&mmo->vma_node);
}
/*
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index e6f177183c0f..5c72462d1f57 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -165,7 +165,6 @@ i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj,
i915_ttm_place_from_region(num_allowed ? obj->mm.placements[0] :
obj->mm.region, &places[0], obj->bo_offset,
obj->base.size, flags);
- places[0].flags |= TTM_PL_FLAG_DESIRED;
/* Cache this on object? */
for (i = 0; i < num_allowed; ++i) {
@@ -779,13 +778,16 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj,
.interruptible = true,
.no_wait_gpu = false,
};
- int real_num_busy;
+ struct ttm_placement initial_placement;
+ struct ttm_place initial_place;
int ret;
/* First try only the requested placement. No eviction. */
- real_num_busy = placement->num_placement;
- placement->num_placement = 1;
- ret = ttm_bo_validate(bo, placement, &ctx);
+ initial_placement.num_placement = 1;
+ memcpy(&initial_place, placement->placement, sizeof(struct ttm_place));
+ initial_place.flags |= TTM_PL_FLAG_DESIRED;
+ initial_placement.placement = &initial_place;
+ ret = ttm_bo_validate(bo, &initial_placement, &ctx);
if (ret) {
ret = i915_ttm_err_to_gem(ret);
/*
@@ -800,7 +802,6 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj,
* If the initial attempt fails, allow all accepted placements,
* evicting if necessary.
*/
- placement->num_placement = real_num_busy;
ret = ttm_bo_validate(bo, placement, &ctx);
if (ret)
return i915_ttm_err_to_gem(ret);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
index 3b69bc6616bd..551b0d7974ff 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
@@ -212,6 +212,37 @@ int intel_gsc_fw_get_binary_info(struct intel_uc_fw *gsc_fw, const void *data, s
}
}
+ if (IS_ARROWLAKE(gt->i915)) {
+ bool too_old = false;
+
+ /*
+ * ARL requires a newer firmware than MTL did (102.0.10.1878) but the
+ * firmware is actually common. So, need to do an explicit version check
+ * here rather than using a separate table entry. And if the older
+ * MTL-only version is found, then just don't use GSC rather than aborting
+ * the driver load.
+ */
+ if (gsc->release.major < 102) {
+ too_old = true;
+ } else if (gsc->release.major == 102) {
+ if (gsc->release.minor == 0) {
+ if (gsc->release.patch < 10) {
+ too_old = true;
+ } else if (gsc->release.patch == 10) {
+ if (gsc->release.build < 1878)
+ too_old = true;
+ }
+ }
+ }
+
+ if (too_old) {
+ gt_info(gt, "GSC firmware too old for ARL, got %d.%d.%d.%d but need at least 102.0.10.1878",
+ gsc->release.major, gsc->release.minor,
+ gsc->release.patch, gsc->release.build);
+ return -EINVAL;
+ }
+ }
+
return 0;
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index d80278eb45d7..ec33ad942115 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -698,12 +698,18 @@ static int check_gsc_manifest(struct intel_gt *gt,
const struct firmware *fw,
struct intel_uc_fw *uc_fw)
{
+ int ret;
+
switch (uc_fw->type) {
case INTEL_UC_FW_TYPE_HUC:
- intel_huc_fw_get_binary_info(uc_fw, fw->data, fw->size);
+ ret = intel_huc_fw_get_binary_info(uc_fw, fw->data, fw->size);
+ if (ret)
+ return ret;
break;
case INTEL_UC_FW_TYPE_GSC:
- intel_gsc_fw_get_binary_info(uc_fw, fw->data, fw->size);
+ ret = intel_gsc_fw_get_binary_info(uc_fw, fw->data, fw->size);
+ if (ret)
+ return ret;
break;
default:
MISSING_CASE(uc_fw->type);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d7723dd11c80..110340e02a02 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -546,6 +546,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
#define IS_LUNARLAKE(i915) (0 && i915)
#define IS_BATTLEMAGE(i915) (0 && i915)
+#define IS_ARROWLAKE(i915) \
+ IS_SUBPLATFORM(i915, INTEL_METEORLAKE, INTEL_SUBPLATFORM_ARL)
#define IS_DG2_G10(i915) \
IS_SUBPLATFORM(i915, INTEL_DG2, INTEL_SUBPLATFORM_G10)
#define IS_DG2_G11(i915) \
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 0b1cd4c7a525..025a79fe5920 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -2749,26 +2749,6 @@ oa_configure_all_contexts(struct i915_perf_stream *stream,
}
static int
-gen12_configure_all_contexts(struct i915_perf_stream *stream,
- const struct i915_oa_config *oa_config,
- struct i915_active *active)
-{
- struct flex regs[] = {
- {
- GEN8_R_PWR_CLK_STATE(RENDER_RING_BASE),
- CTX_R_PWR_CLK_STATE,
- },
- };
-
- if (stream->engine->class != RENDER_CLASS)
- return 0;
-
- return oa_configure_all_contexts(stream,
- regs, ARRAY_SIZE(regs),
- active);
-}
-
-static int
lrc_configure_all_contexts(struct i915_perf_stream *stream,
const struct i915_oa_config *oa_config,
struct i915_active *active)
@@ -2874,7 +2854,6 @@ gen12_enable_metric_set(struct i915_perf_stream *stream,
{
struct drm_i915_private *i915 = stream->perf->i915;
struct intel_uncore *uncore = stream->uncore;
- struct i915_oa_config *oa_config = stream->oa_config;
bool periodic = stream->periodic;
u32 period_exponent = stream->period_exponent;
u32 sqcnt1;
@@ -2919,15 +2898,6 @@ gen12_enable_metric_set(struct i915_perf_stream *stream,
intel_uncore_rmw(uncore, GEN12_SQCNT1, 0, sqcnt1);
/*
- * Update all contexts prior writing the mux configurations as we need
- * to make sure all slices/subslices are ON before writing to NOA
- * registers.
- */
- ret = gen12_configure_all_contexts(stream, oa_config, active);
- if (ret)
- return ret;
-
- /*
* For Gen12, performance counters are context
* saved/restored. Only enable it for the context that
* requested this.
@@ -2980,9 +2950,6 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream)
_MASKED_BIT_DISABLE(GEN12_DISABLE_DOP_GATING));
}
- /* Reset all contexts' slices/subslices configurations. */
- gen12_configure_all_contexts(stream, NULL, NULL);
-
/* disable the context save/restore or OAR counters */
if (stream->ctx)
gen12_configure_oar_context(stream, NULL);
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index d26de37719a7..eede5417cb3f 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -203,6 +203,10 @@ static const u16 subplatform_g12_ids[] = {
INTEL_DG2_G12_IDS(ID),
};
+static const u16 subplatform_arl_ids[] = {
+ INTEL_ARL_IDS(ID),
+};
+
static bool find_devid(u16 id, const u16 *p, unsigned int num)
{
for (; num; num--, p++) {
@@ -260,6 +264,9 @@ static void intel_device_info_subplatform_init(struct drm_i915_private *i915)
} else if (find_devid(devid, subplatform_g12_ids,
ARRAY_SIZE(subplatform_g12_ids))) {
mask = BIT(INTEL_SUBPLATFORM_G12);
+ } else if (find_devid(devid, subplatform_arl_ids,
+ ARRAY_SIZE(subplatform_arl_ids))) {
+ mask = BIT(INTEL_SUBPLATFORM_ARL);
}
GEM_BUG_ON(mask & ~INTEL_SUBPLATFORM_MASK);
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index d1a2abc7e513..df73ef94615d 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -127,6 +127,9 @@ enum intel_platform {
#define INTEL_SUBPLATFORM_N 1
#define INTEL_SUBPLATFORM_RPLU 2
+/* MTL */
+#define INTEL_SUBPLATFORM_ARL 0
+
enum intel_ppgtt_type {
INTEL_PPGTT_NONE = I915_GEM_PPGTT_NONE,
INTEL_PPGTT_ALIASING = I915_GEM_PPGTT_ALIASING,
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
index ae5c6ec24a1e..77b50c56c124 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
@@ -539,8 +539,8 @@ static int mtk_drm_kms_init(struct drm_device *drm)
}
/* IGT will check if the cursor size is configured */
- drm->mode_config.cursor_width = drm->mode_config.max_width;
- drm->mode_config.cursor_height = drm->mode_config.max_height;
+ drm->mode_config.cursor_width = 512;
+ drm->mode_config.cursor_height = 512;
/* Use OVL device for all DMA memory allocations */
crtc = drm_crtc_from_index(drm, 0);
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 1c6626747b98..ecc3fc5cec22 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -99,7 +99,7 @@ static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname,
* was a bad idea, and is only provided for backwards
* compatibility for older targets.
*/
- return -ENODEV;
+ return -ENOENT;
}
if (IS_ERR(fw)) {
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
index 34c56e855af7..3b171bf227d1 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
@@ -1171,8 +1171,6 @@ static void dpu_encoder_virt_atomic_mode_set(struct drm_encoder *drm_enc,
cstate->num_mixers = num_lm;
- dpu_enc->connector = conn_state->connector;
-
for (i = 0; i < dpu_enc->num_phys_encs; i++) {
struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i];
@@ -1270,6 +1268,8 @@ static void dpu_encoder_virt_atomic_enable(struct drm_encoder *drm_enc,
dpu_enc->commit_done_timedout = false;
+ dpu_enc->connector = drm_atomic_get_new_connector_for_encoder(state, drm_enc);
+
cur_mode = &dpu_enc->base.crtc->state->adjusted_mode;
dpu_enc->wide_bus_en = dpu_encoder_is_widebus_enabled(drm_enc);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
index fc178ec73907..648c8d0a4c36 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
@@ -308,8 +308,8 @@ static const u32 wb2_formats_rgb_yuv[] = {
{ \
.maxdwnscale = SSPP_UNITY_SCALE, \
.maxupscale = SSPP_UNITY_SCALE, \
- .format_list = plane_formats_yuv, \
- .num_formats = ARRAY_SIZE(plane_formats_yuv), \
+ .format_list = plane_formats, \
+ .num_formats = ARRAY_SIZE(plane_formats), \
.virt_format_list = plane_formats, \
.virt_num_formats = ARRAY_SIZE(plane_formats), \
}
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
index e2adc937ea63..935ff6fd172c 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
@@ -31,24 +31,14 @@
* @fmt: Pointer to format string
*/
#define DPU_DEBUG(fmt, ...) \
- do { \
- if (drm_debug_enabled(DRM_UT_KMS)) \
- DRM_DEBUG(fmt, ##__VA_ARGS__); \
- else \
- pr_debug(fmt, ##__VA_ARGS__); \
- } while (0)
+ DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__)
/**
* DPU_DEBUG_DRIVER - macro for hardware driver logging
* @fmt: Pointer to format string
*/
#define DPU_DEBUG_DRIVER(fmt, ...) \
- do { \
- if (drm_debug_enabled(DRM_UT_DRIVER)) \
- DRM_ERROR(fmt, ##__VA_ARGS__); \
- else \
- pr_debug(fmt, ##__VA_ARGS__); \
- } while (0)
+ DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__)
#define DPU_ERROR(fmt, ...) pr_err("[dpu error]" fmt, ##__VA_ARGS__)
#define DPU_ERROR_RATELIMITED(fmt, ...) pr_err_ratelimited("[dpu error]" fmt, ##__VA_ARGS__)
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
index 40c4dd2c3139..29298e066163 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
@@ -681,6 +681,9 @@ static int dpu_plane_prepare_fb(struct drm_plane *plane,
new_state->fb, &layout);
if (ret) {
DPU_ERROR_PLANE(pdpu, "failed to get format layout, %d\n", ret);
+ if (pstate->aspace)
+ msm_framebuffer_cleanup(new_state->fb, pstate->aspace,
+ pstate->needs_dirtyfb);
return ret;
}
@@ -744,10 +747,9 @@ static int dpu_plane_atomic_check_pipe(struct dpu_plane *pdpu,
min_src_size = MSM_FORMAT_IS_YUV(fmt) ? 2 : 1;
if (MSM_FORMAT_IS_YUV(fmt) &&
- (!pipe->sspp->cap->sblk->scaler_blk.len ||
- !pipe->sspp->cap->sblk->csc_blk.len)) {
+ !pipe->sspp->cap->sblk->csc_blk.len) {
DPU_DEBUG_PLANE(pdpu,
- "plane doesn't have scaler/csc for yuv\n");
+ "plane doesn't have csc for yuv\n");
return -EINVAL;
}
@@ -864,6 +866,10 @@ static int dpu_plane_atomic_check(struct drm_plane *plane,
max_linewidth = pdpu->catalog->caps->max_linewidth;
+ drm_rect_rotate(&pipe_cfg->src_rect,
+ new_plane_state->fb->width, new_plane_state->fb->height,
+ new_plane_state->rotation);
+
if ((drm_rect_width(&pipe_cfg->src_rect) > max_linewidth) ||
_dpu_plane_calc_clk(&crtc_state->adjusted_mode, pipe_cfg) > max_mdp_clk_rate) {
/*
@@ -913,6 +919,14 @@ static int dpu_plane_atomic_check(struct drm_plane *plane,
r_pipe_cfg->dst_rect.x1 = pipe_cfg->dst_rect.x2;
}
+ drm_rect_rotate_inv(&pipe_cfg->src_rect,
+ new_plane_state->fb->width, new_plane_state->fb->height,
+ new_plane_state->rotation);
+ if (r_pipe->sspp)
+ drm_rect_rotate_inv(&r_pipe_cfg->src_rect,
+ new_plane_state->fb->width, new_plane_state->fb->height,
+ new_plane_state->rotation);
+
ret = dpu_plane_atomic_check_pipe(pdpu, pipe, pipe_cfg, fmt, &crtc_state->adjusted_mode);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c
index 7bc8a9f0657a..f342fc5ae41e 100644
--- a/drivers/gpu/drm/msm/dp/dp_ctrl.c
+++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c
@@ -1286,6 +1286,8 @@ static int dp_ctrl_link_train(struct dp_ctrl_private *ctrl,
link_info.rate = ctrl->link->link_params.rate;
link_info.capabilities = DP_LINK_CAP_ENHANCED_FRAMING;
+ dp_link_reset_phy_params_vx_px(ctrl->link);
+
dp_aux_link_configure(ctrl->aux, &link_info);
if (drm_dp_max_downspread(dpcd))
diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c b/drivers/gpu/drm/msm/dp/dp_panel.c
index a916b5f3b317..6ff6c9ef351f 100644
--- a/drivers/gpu/drm/msm/dp/dp_panel.c
+++ b/drivers/gpu/drm/msm/dp/dp_panel.c
@@ -90,22 +90,22 @@ static int dp_panel_read_dpcd(struct dp_panel *dp_panel)
static u32 dp_panel_get_supported_bpp(struct dp_panel *dp_panel,
u32 mode_edid_bpp, u32 mode_pclk_khz)
{
- struct dp_link_info *link_info;
+ const struct dp_link_info *link_info;
const u32 max_supported_bpp = 30, min_supported_bpp = 18;
- u32 bpp = 0, data_rate_khz = 0;
+ u32 bpp, data_rate_khz;
- bpp = min_t(u32, mode_edid_bpp, max_supported_bpp);
+ bpp = min(mode_edid_bpp, max_supported_bpp);
link_info = &dp_panel->link_info;
data_rate_khz = link_info->num_lanes * link_info->rate * 8;
- while (bpp > min_supported_bpp) {
+ do {
if (mode_pclk_khz * bpp <= data_rate_khz)
- break;
+ return bpp;
bpp -= 6;
- }
+ } while (bpp > min_supported_bpp);
- return bpp;
+ return min_supported_bpp;
}
int dp_panel_read_sink_caps(struct dp_panel *dp_panel,
@@ -423,8 +423,9 @@ int dp_panel_init_panel_info(struct dp_panel *dp_panel)
drm_mode->clock);
drm_dbg_dp(panel->drm_dev, "bpp = %d\n", dp_panel->dp_mode.bpp);
- dp_panel->dp_mode.bpp = max_t(u32, 18,
- min_t(u32, dp_panel->dp_mode.bpp, 30));
+ dp_panel->dp_mode.bpp = dp_panel_get_mode_bpp(dp_panel, dp_panel->dp_mode.bpp,
+ dp_panel->dp_mode.drm_mode.clock);
+
drm_dbg_dp(panel->drm_dev, "updated bpp = %d\n",
dp_panel->dp_mode.bpp);
diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c
index d90b9471ba6f..faa88fd6eb4d 100644
--- a/drivers/gpu/drm/msm/msm_mdss.c
+++ b/drivers/gpu/drm/msm/msm_mdss.c
@@ -577,7 +577,7 @@ static const struct msm_mdss_data sc7180_data = {
.ubwc_enc_version = UBWC_2_0,
.ubwc_dec_version = UBWC_2_0,
.ubwc_static = 0x1e,
- .highest_bank_bit = 0x3,
+ .highest_bank_bit = 0x1,
.reg_bus_bw = 76800,
};
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 0712d0b15170..70fb003a6666 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -898,7 +898,7 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict,
* Without this the operation can timeout and we'll fallback to a
* software copy, which might take several minutes to finish.
*/
- nouveau_fence_wait(fence, false);
+ nouveau_fence_wait(fence, false, false);
ret = ttm_bo_move_accel_cleanup(bo, &fence->base, evict, false,
new_reg);
nouveau_fence_unref(&fence);
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c
index 66fca95c10c7..7c97b2886807 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.c
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
@@ -72,7 +72,7 @@ nouveau_channel_idle(struct nouveau_channel *chan)
ret = nouveau_fence_new(&fence, chan);
if (!ret) {
- ret = nouveau_fence_wait(fence, false);
+ ret = nouveau_fence_wait(fence, false, false);
nouveau_fence_unref(&fence);
}
diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
index 6719353e2e13..6fb65b01d778 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -128,7 +128,7 @@ static void nouveau_dmem_page_free(struct page *page)
static void nouveau_dmem_fence_done(struct nouveau_fence **fence)
{
if (fence) {
- nouveau_fence_wait(*fence, false);
+ nouveau_fence_wait(*fence, true, false);
nouveau_fence_unref(fence);
} else {
/*
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index ba469767a20f..93f08f9479d8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -311,11 +311,39 @@ nouveau_fence_wait_legacy(struct dma_fence *f, bool intr, long wait)
return timeout - t;
}
+static int
+nouveau_fence_wait_busy(struct nouveau_fence *fence, bool intr)
+{
+ int ret = 0;
+
+ while (!nouveau_fence_done(fence)) {
+ if (time_after_eq(jiffies, fence->timeout)) {
+ ret = -EBUSY;
+ break;
+ }
+
+ __set_current_state(intr ?
+ TASK_INTERRUPTIBLE :
+ TASK_UNINTERRUPTIBLE);
+
+ if (intr && signal_pending(current)) {
+ ret = -ERESTARTSYS;
+ break;
+ }
+ }
+
+ __set_current_state(TASK_RUNNING);
+ return ret;
+}
+
int
-nouveau_fence_wait(struct nouveau_fence *fence, bool intr)
+nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr)
{
long ret;
+ if (!lazy)
+ return nouveau_fence_wait_busy(fence, intr);
+
ret = dma_fence_wait_timeout(&fence->base, intr, 15 * HZ);
if (ret < 0)
return ret;
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h
index 1b63197b744a..8bc065acfe35 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
@@ -23,7 +23,7 @@ void nouveau_fence_unref(struct nouveau_fence **);
int nouveau_fence_emit(struct nouveau_fence *);
bool nouveau_fence_done(struct nouveau_fence *);
-int nouveau_fence_wait(struct nouveau_fence *, bool intr);
+int nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr);
int nouveau_fence_sync(struct nouveau_bo *, struct nouveau_channel *, bool exclusive, bool intr);
struct nouveau_fence_chan {
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 2e535caa7d6e..5a887d67dc0e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -928,7 +928,7 @@ revalidate:
}
if (sync) {
- if (!(ret = nouveau_fence_wait(fence, false))) {
+ if (!(ret = nouveau_fence_wait(fence, false, false))) {
if ((ret = dma_fence_get_status(&fence->base)) == 1)
ret = 0;
}
diff --git a/drivers/gpu/drm/nouveau/nouveau_prime.c b/drivers/gpu/drm/nouveau/nouveau_prime.c
index b58ab595faf8..cd95446d6851 100644
--- a/drivers/gpu/drm/nouveau/nouveau_prime.c
+++ b/drivers/gpu/drm/nouveau/nouveau_prime.c
@@ -64,7 +64,8 @@ struct drm_gem_object *nouveau_gem_prime_import_sg_table(struct drm_device *dev,
* to the caller, instead of a normal nouveau_bo ttm reference. */
ret = drm_gem_object_init(dev, &nvbo->bo.base, size);
if (ret) {
- nouveau_bo_ref(NULL, &nvbo);
+ drm_gem_object_release(&nvbo->bo.base);
+ kfree(nvbo);
obj = ERR_PTR(-ENOMEM);
goto unlock;
}
diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
index 9402fa320a7e..48f105239f42 100644
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -1803,6 +1803,7 @@ nouveau_uvmm_bo_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
{
struct nouveau_bo *nvbo = nouveau_gem_object(vm_bo->obj);
+ nouveau_bo_placement_set(nvbo, nvbo->valid_domains, 0);
return nouveau_bo_validate(nvbo, true, false);
}
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/firmware.c b/drivers/gpu/drm/nouveau/nvkm/core/firmware.c
index adc60b25f8e6..0af01a0ec601 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/firmware.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/firmware.c
@@ -205,7 +205,8 @@ nvkm_firmware_dtor(struct nvkm_firmware *fw)
break;
case NVKM_FIRMWARE_IMG_DMA:
nvkm_memory_unref(&memory);
- dma_free_coherent(fw->device->dev, sg_dma_len(&fw->mem.sgl), fw->img, fw->phys);
+ dma_free_noncoherent(fw->device->dev, sg_dma_len(&fw->mem.sgl),
+ fw->img, fw->phys, DMA_TO_DEVICE);
break;
case NVKM_FIRMWARE_IMG_SGT:
nvkm_memory_unref(&memory);
@@ -236,10 +237,12 @@ nvkm_firmware_ctor(const struct nvkm_firmware_func *func, const char *name,
break;
case NVKM_FIRMWARE_IMG_DMA: {
dma_addr_t addr;
-
len = ALIGN(fw->len, PAGE_SIZE);
- fw->img = dma_alloc_coherent(fw->device->dev, len, &addr, GFP_KERNEL);
+ fw->img = dma_alloc_noncoherent(fw->device->dev,
+ len, &addr,
+ DMA_TO_DEVICE,
+ GFP_KERNEL);
if (fw->img) {
memcpy(fw->img, src, fw->len);
fw->phys = addr;
diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/fw.c b/drivers/gpu/drm/nouveau/nvkm/falcon/fw.c
index 80a480b12174..a1c8545f1249 100644
--- a/drivers/gpu/drm/nouveau/nvkm/falcon/fw.c
+++ b/drivers/gpu/drm/nouveau/nvkm/falcon/fw.c
@@ -89,6 +89,12 @@ nvkm_falcon_fw_boot(struct nvkm_falcon_fw *fw, struct nvkm_subdev *user,
nvkm_falcon_fw_dtor_sigs(fw);
}
+ /* after last write to the img, sync dma mappings */
+ dma_sync_single_for_device(fw->fw.device->dev,
+ fw->fw.phys,
+ sg_dma_len(&fw->fw.mem.sgl),
+ DMA_TO_DEVICE);
+
FLCNFW_DBG(fw, "resetting");
fw->func->reset(fw);
diff --git a/drivers/gpu/drm/omapdrm/Kconfig b/drivers/gpu/drm/omapdrm/Kconfig
index 3f7139e211d2..64e440a2649b 100644
--- a/drivers/gpu/drm/omapdrm/Kconfig
+++ b/drivers/gpu/drm/omapdrm/Kconfig
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
config DRM_OMAP
tristate "OMAP DRM"
+ depends on MMU
depends on DRM && OF
depends on ARCH_OMAP2PLUS || (COMPILE_TEST && PAGE_SIZE_LESS_THAN_64KB)
select DRM_KMS_HELPER
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index 1fe6e0d883c7..e5577d2a19ef 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -33,8 +33,10 @@
#include "evergreen_reg_safe.h"
#include "cayman_reg_safe.h"
+#ifndef MIN
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+#endif
#define REG_SAFE_BM_SIZE ARRAY_SIZE(evergreen_reg_safe_bm)
diff --git a/drivers/gpu/drm/rockchip/inno_hdmi.c b/drivers/gpu/drm/rockchip/inno_hdmi.c
index 2241e53a2946..dec6913cec5b 100644
--- a/drivers/gpu/drm/rockchip/inno_hdmi.c
+++ b/drivers/gpu/drm/rockchip/inno_hdmi.c
@@ -279,7 +279,6 @@ static int inno_hdmi_upload_frame(struct drm_connector *connector,
const u8 *buffer, size_t len)
{
struct inno_hdmi *hdmi = connector_to_inno_hdmi(connector);
- u8 packed_frame[HDMI_MAXIMUM_INFO_FRAME_SIZE];
ssize_t i;
if (type != HDMI_INFOFRAME_TYPE_AVI) {
@@ -291,8 +290,7 @@ static int inno_hdmi_upload_frame(struct drm_connector *connector,
inno_hdmi_disable_frame(connector, type);
for (i = 0; i < len; i++)
- hdmi_writeb(hdmi, HDMI_CONTROL_PACKET_ADDR + i,
- packed_frame[i]);
+ hdmi_writeb(hdmi, HDMI_CONTROL_PACKET_ADDR + i, buffer[i]);
return 0;
}
diff --git a/drivers/gpu/drm/tests/drm_gem_shmem_test.c b/drivers/gpu/drm/tests/drm_gem_shmem_test.c
index c3758faa1b83..d8d0e4d1682f 100644
--- a/drivers/gpu/drm/tests/drm_gem_shmem_test.c
+++ b/drivers/gpu/drm/tests/drm_gem_shmem_test.c
@@ -102,6 +102,17 @@ static void drm_gem_shmem_test_obj_create_private(struct kunit *test)
sg_init_one(sgt->sgl, buf, TEST_SIZE);
+ /*
+ * Set the DMA mask to 64-bits and map the sgtables
+ * otherwise drm_gem_shmem_free will cause a warning
+ * on debug kernels.
+ */
+ ret = dma_set_mask(drm_dev->dev, DMA_BIT_MASK(64));
+ KUNIT_ASSERT_EQ(test, ret, 0);
+
+ ret = dma_map_sgtable(drm_dev->dev, sgt, DMA_BIDIRECTIONAL, 0);
+ KUNIT_ASSERT_EQ(test, ret, 0);
+
/* Init a mock DMA-BUF */
buf_mock.size = TEST_SIZE;
attach_mock.dmabuf = &buf_mock;
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index 49089eefb7c7..a0febdb6f214 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -565,6 +565,10 @@ void v3d_mmu_insert_ptes(struct v3d_bo *bo);
void v3d_mmu_remove_ptes(struct v3d_bo *bo);
/* v3d_sched.c */
+void v3d_timestamp_query_info_free(struct v3d_timestamp_query_info *query_info,
+ unsigned int count);
+void v3d_performance_query_info_free(struct v3d_performance_query_info *query_info,
+ unsigned int count);
void v3d_job_update_stats(struct v3d_job *job, enum v3d_queue queue);
int v3d_sched_init(struct v3d_dev *v3d);
void v3d_sched_fini(struct v3d_dev *v3d);
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index 271a6d0f5aca..ad1e6236ff6f 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -73,24 +73,44 @@ v3d_sched_job_free(struct drm_sched_job *sched_job)
v3d_job_cleanup(job);
}
+void
+v3d_timestamp_query_info_free(struct v3d_timestamp_query_info *query_info,
+ unsigned int count)
+{
+ if (query_info->queries) {
+ unsigned int i;
+
+ for (i = 0; i < count; i++)
+ drm_syncobj_put(query_info->queries[i].syncobj);
+
+ kvfree(query_info->queries);
+ }
+}
+
+void
+v3d_performance_query_info_free(struct v3d_performance_query_info *query_info,
+ unsigned int count)
+{
+ if (query_info->queries) {
+ unsigned int i;
+
+ for (i = 0; i < count; i++)
+ drm_syncobj_put(query_info->queries[i].syncobj);
+
+ kvfree(query_info->queries);
+ }
+}
+
static void
v3d_cpu_job_free(struct drm_sched_job *sched_job)
{
struct v3d_cpu_job *job = to_cpu_job(sched_job);
- struct v3d_timestamp_query_info *timestamp_query = &job->timestamp_query;
- struct v3d_performance_query_info *performance_query = &job->performance_query;
- if (timestamp_query->queries) {
- for (int i = 0; i < timestamp_query->count; i++)
- drm_syncobj_put(timestamp_query->queries[i].syncobj);
- kvfree(timestamp_query->queries);
- }
+ v3d_timestamp_query_info_free(&job->timestamp_query,
+ job->timestamp_query.count);
- if (performance_query->queries) {
- for (int i = 0; i < performance_query->count; i++)
- drm_syncobj_put(performance_query->queries[i].syncobj);
- kvfree(performance_query->queries);
- }
+ v3d_performance_query_info_free(&job->performance_query,
+ job->performance_query.count);
v3d_job_cleanup(&job->base);
}
@@ -114,6 +134,8 @@ v3d_job_start_stats(struct v3d_job *job, enum v3d_queue queue)
struct v3d_stats *local_stats = &file->stats[queue];
u64 now = local_clock();
+ preempt_disable();
+
write_seqcount_begin(&local_stats->lock);
local_stats->start_ns = now;
write_seqcount_end(&local_stats->lock);
@@ -121,6 +143,8 @@ v3d_job_start_stats(struct v3d_job *job, enum v3d_queue queue)
write_seqcount_begin(&global_stats->lock);
global_stats->start_ns = now;
write_seqcount_end(&global_stats->lock);
+
+ preempt_enable();
}
static void
@@ -142,8 +166,10 @@ v3d_job_update_stats(struct v3d_job *job, enum v3d_queue queue)
struct v3d_stats *local_stats = &file->stats[queue];
u64 now = local_clock();
+ preempt_disable();
v3d_stats_update(local_stats, now);
v3d_stats_update(global_stats, now);
+ preempt_enable();
}
static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
@@ -295,7 +321,7 @@ v3d_csd_job_run(struct drm_sched_job *sched_job)
struct v3d_dev *v3d = job->base.v3d;
struct drm_device *dev = &v3d->drm;
struct dma_fence *fence;
- int i, csd_cfg0_reg, csd_cfg_reg_count;
+ int i, csd_cfg0_reg;
v3d->csd_job = job;
@@ -315,9 +341,17 @@ v3d_csd_job_run(struct drm_sched_job *sched_job)
v3d_switch_perfmon(v3d, &job->base);
csd_cfg0_reg = V3D_CSD_QUEUED_CFG0(v3d->ver);
- csd_cfg_reg_count = v3d->ver < 71 ? 6 : 7;
- for (i = 1; i <= csd_cfg_reg_count; i++)
+ for (i = 1; i <= 6; i++)
V3D_CORE_WRITE(0, csd_cfg0_reg + 4 * i, job->args.cfg[i]);
+
+ /* Although V3D 7.1 has an eighth configuration register, we are not
+ * using it. Therefore, make sure it remains unused.
+ *
+ * XXX: Set the CFG7 register
+ */
+ if (v3d->ver >= 71)
+ V3D_CORE_WRITE(0, V3D_V7_CSD_QUEUED_CFG7, 0);
+
/* CFG0 write kicks off the job. */
V3D_CORE_WRITE(0, csd_cfg0_reg, job->args.cfg[0]);
diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c
index 88f63d526b22..4cdfabbf4964 100644
--- a/drivers/gpu/drm/v3d/v3d_submit.c
+++ b/drivers/gpu/drm/v3d/v3d_submit.c
@@ -452,6 +452,8 @@ v3d_get_cpu_timestamp_query_params(struct drm_file *file_priv,
{
u32 __user *offsets, *syncs;
struct drm_v3d_timestamp_query timestamp;
+ unsigned int i;
+ int err;
if (!job) {
DRM_DEBUG("CPU job extension was attached to a GPU job.\n");
@@ -480,26 +482,34 @@ v3d_get_cpu_timestamp_query_params(struct drm_file *file_priv,
offsets = u64_to_user_ptr(timestamp.offsets);
syncs = u64_to_user_ptr(timestamp.syncs);
- for (int i = 0; i < timestamp.count; i++) {
+ for (i = 0; i < timestamp.count; i++) {
u32 offset, sync;
if (copy_from_user(&offset, offsets++, sizeof(offset))) {
- kvfree(job->timestamp_query.queries);
- return -EFAULT;
+ err = -EFAULT;
+ goto error;
}
job->timestamp_query.queries[i].offset = offset;
if (copy_from_user(&sync, syncs++, sizeof(sync))) {
- kvfree(job->timestamp_query.queries);
- return -EFAULT;
+ err = -EFAULT;
+ goto error;
}
job->timestamp_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync);
+ if (!job->timestamp_query.queries[i].syncobj) {
+ err = -ENOENT;
+ goto error;
+ }
}
job->timestamp_query.count = timestamp.count;
return 0;
+
+error:
+ v3d_timestamp_query_info_free(&job->timestamp_query, i);
+ return err;
}
static int
@@ -509,6 +519,8 @@ v3d_get_cpu_reset_timestamp_params(struct drm_file *file_priv,
{
u32 __user *syncs;
struct drm_v3d_reset_timestamp_query reset;
+ unsigned int i;
+ int err;
if (!job) {
DRM_DEBUG("CPU job extension was attached to a GPU job.\n");
@@ -533,21 +545,29 @@ v3d_get_cpu_reset_timestamp_params(struct drm_file *file_priv,
syncs = u64_to_user_ptr(reset.syncs);
- for (int i = 0; i < reset.count; i++) {
+ for (i = 0; i < reset.count; i++) {
u32 sync;
job->timestamp_query.queries[i].offset = reset.offset + 8 * i;
if (copy_from_user(&sync, syncs++, sizeof(sync))) {
- kvfree(job->timestamp_query.queries);
- return -EFAULT;
+ err = -EFAULT;
+ goto error;
}
job->timestamp_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync);
+ if (!job->timestamp_query.queries[i].syncobj) {
+ err = -ENOENT;
+ goto error;
+ }
}
job->timestamp_query.count = reset.count;
return 0;
+
+error:
+ v3d_timestamp_query_info_free(&job->timestamp_query, i);
+ return err;
}
/* Get data for the copy timestamp query results job submission. */
@@ -558,7 +578,8 @@ v3d_get_cpu_copy_query_results_params(struct drm_file *file_priv,
{
u32 __user *offsets, *syncs;
struct drm_v3d_copy_timestamp_query copy;
- int i;
+ unsigned int i;
+ int err;
if (!job) {
DRM_DEBUG("CPU job extension was attached to a GPU job.\n");
@@ -591,18 +612,22 @@ v3d_get_cpu_copy_query_results_params(struct drm_file *file_priv,
u32 offset, sync;
if (copy_from_user(&offset, offsets++, sizeof(offset))) {
- kvfree(job->timestamp_query.queries);
- return -EFAULT;
+ err = -EFAULT;
+ goto error;
}
job->timestamp_query.queries[i].offset = offset;
if (copy_from_user(&sync, syncs++, sizeof(sync))) {
- kvfree(job->timestamp_query.queries);
- return -EFAULT;
+ err = -EFAULT;
+ goto error;
}
job->timestamp_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync);
+ if (!job->timestamp_query.queries[i].syncobj) {
+ err = -ENOENT;
+ goto error;
+ }
}
job->timestamp_query.count = copy.count;
@@ -613,6 +638,10 @@ v3d_get_cpu_copy_query_results_params(struct drm_file *file_priv,
job->copy.stride = copy.stride;
return 0;
+
+error:
+ v3d_timestamp_query_info_free(&job->timestamp_query, i);
+ return err;
}
static int
@@ -623,6 +652,8 @@ v3d_get_cpu_reset_performance_params(struct drm_file *file_priv,
u32 __user *syncs;
u64 __user *kperfmon_ids;
struct drm_v3d_reset_performance_query reset;
+ unsigned int i, j;
+ int err;
if (!job) {
DRM_DEBUG("CPU job extension was attached to a GPU job.\n");
@@ -637,6 +668,9 @@ v3d_get_cpu_reset_performance_params(struct drm_file *file_priv,
if (copy_from_user(&reset, ext, sizeof(reset)))
return -EFAULT;
+ if (reset.nperfmons > V3D_MAX_PERFMONS)
+ return -EINVAL;
+
job->job_type = V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY;
job->performance_query.queries = kvmalloc_array(reset.count,
@@ -648,39 +682,47 @@ v3d_get_cpu_reset_performance_params(struct drm_file *file_priv,
syncs = u64_to_user_ptr(reset.syncs);
kperfmon_ids = u64_to_user_ptr(reset.kperfmon_ids);
- for (int i = 0; i < reset.count; i++) {
+ for (i = 0; i < reset.count; i++) {
u32 sync;
u64 ids;
u32 __user *ids_pointer;
u32 id;
if (copy_from_user(&sync, syncs++, sizeof(sync))) {
- kvfree(job->performance_query.queries);
- return -EFAULT;
+ err = -EFAULT;
+ goto error;
}
- job->performance_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync);
-
if (copy_from_user(&ids, kperfmon_ids++, sizeof(ids))) {
- kvfree(job->performance_query.queries);
- return -EFAULT;
+ err = -EFAULT;
+ goto error;
}
ids_pointer = u64_to_user_ptr(ids);
- for (int j = 0; j < reset.nperfmons; j++) {
+ for (j = 0; j < reset.nperfmons; j++) {
if (copy_from_user(&id, ids_pointer++, sizeof(id))) {
- kvfree(job->performance_query.queries);
- return -EFAULT;
+ err = -EFAULT;
+ goto error;
}
job->performance_query.queries[i].kperfmon_ids[j] = id;
}
+
+ job->performance_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync);
+ if (!job->performance_query.queries[i].syncobj) {
+ err = -ENOENT;
+ goto error;
+ }
}
job->performance_query.count = reset.count;
job->performance_query.nperfmons = reset.nperfmons;
return 0;
+
+error:
+ v3d_performance_query_info_free(&job->performance_query, i);
+ return err;
}
static int
@@ -691,6 +733,8 @@ v3d_get_cpu_copy_performance_query_params(struct drm_file *file_priv,
u32 __user *syncs;
u64 __user *kperfmon_ids;
struct drm_v3d_copy_performance_query copy;
+ unsigned int i, j;
+ int err;
if (!job) {
DRM_DEBUG("CPU job extension was attached to a GPU job.\n");
@@ -708,6 +752,9 @@ v3d_get_cpu_copy_performance_query_params(struct drm_file *file_priv,
if (copy.pad)
return -EINVAL;
+ if (copy.nperfmons > V3D_MAX_PERFMONS)
+ return -EINVAL;
+
job->job_type = V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY;
job->performance_query.queries = kvmalloc_array(copy.count,
@@ -719,34 +766,38 @@ v3d_get_cpu_copy_performance_query_params(struct drm_file *file_priv,
syncs = u64_to_user_ptr(copy.syncs);
kperfmon_ids = u64_to_user_ptr(copy.kperfmon_ids);
- for (int i = 0; i < copy.count; i++) {
+ for (i = 0; i < copy.count; i++) {
u32 sync;
u64 ids;
u32 __user *ids_pointer;
u32 id;
if (copy_from_user(&sync, syncs++, sizeof(sync))) {
- kvfree(job->performance_query.queries);
- return -EFAULT;
+ err = -EFAULT;
+ goto error;
}
- job->performance_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync);
-
if (copy_from_user(&ids, kperfmon_ids++, sizeof(ids))) {
- kvfree(job->performance_query.queries);
- return -EFAULT;
+ err = -EFAULT;
+ goto error;
}
ids_pointer = u64_to_user_ptr(ids);
- for (int j = 0; j < copy.nperfmons; j++) {
+ for (j = 0; j < copy.nperfmons; j++) {
if (copy_from_user(&id, ids_pointer++, sizeof(id))) {
- kvfree(job->performance_query.queries);
- return -EFAULT;
+ err = -EFAULT;
+ goto error;
}
job->performance_query.queries[i].kperfmon_ids[j] = id;
}
+
+ job->performance_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync);
+ if (!job->performance_query.queries[i].syncobj) {
+ err = -ENOENT;
+ goto error;
+ }
}
job->performance_query.count = copy.count;
job->performance_query.nperfmons = copy.nperfmons;
@@ -759,6 +810,10 @@ v3d_get_cpu_copy_performance_query_params(struct drm_file *file_priv,
job->copy.stride = copy.stride;
return 0;
+
+error:
+ v3d_performance_query_info_free(&job->performance_query, i);
+ return err;
}
/* Whenever userspace sets ioctl extensions, v3d_get_extensions parses data
diff --git a/drivers/gpu/drm/virtio/virtgpu_submit.c b/drivers/gpu/drm/virtio/virtgpu_submit.c
index 1c7c7f61a222..7d34cf83f5f2 100644
--- a/drivers/gpu/drm/virtio/virtgpu_submit.c
+++ b/drivers/gpu/drm/virtio/virtgpu_submit.c
@@ -48,7 +48,7 @@ struct virtio_gpu_submit {
static int virtio_gpu_do_fence_wait(struct virtio_gpu_submit *submit,
struct dma_fence *in_fence)
{
- u32 context = submit->fence_ctx + submit->ring_idx;
+ u64 context = submit->fence_ctx + submit->ring_idx;
if (dma_fence_match_context(in_fence, context))
return 0;
diff --git a/drivers/gpu/drm/vmwgfx/vmw_surface_cache.h b/drivers/gpu/drm/vmwgfx/vmw_surface_cache.h
index b0d87c5f58d8..1ac3cb151b11 100644
--- a/drivers/gpu/drm/vmwgfx/vmw_surface_cache.h
+++ b/drivers/gpu/drm/vmwgfx/vmw_surface_cache.h
@@ -1,6 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/**********************************************************
- * Copyright 2021 VMware, Inc.
- * SPDX-License-Identifier: GPL-2.0 OR MIT
+ *
+ * Copyright (c) 2021-2024 Broadcom. All Rights Reserved. The term
+ * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
@@ -31,6 +33,10 @@
#include <drm/vmwgfx_drm.h>
+#define SVGA3D_FLAGS_UPPER_32(svga3d_flags) ((svga3d_flags) >> 32)
+#define SVGA3D_FLAGS_LOWER_32(svga3d_flags) \
+ ((svga3d_flags) & ((uint64_t)U32_MAX))
+
static inline u32 clamped_umul32(u32 a, u32 b)
{
uint64_t tmp = (uint64_t) a*b;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c b/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c
index 717d624e9a05..890a66a2361f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c
@@ -27,6 +27,8 @@
**************************************************************************/
#include "vmwgfx_drv.h"
+
+#include "vmwgfx_bo.h"
#include <linux/highmem.h>
/*
@@ -420,13 +422,105 @@ static int vmw_bo_cpu_blit_line(struct vmw_bo_blit_line_data *d,
return 0;
}
+static void *map_external(struct vmw_bo *bo, struct iosys_map *map)
+{
+ struct vmw_private *vmw =
+ container_of(bo->tbo.bdev, struct vmw_private, bdev);
+ void *ptr = NULL;
+ int ret;
+
+ if (bo->tbo.base.import_attach) {
+ ret = dma_buf_vmap(bo->tbo.base.dma_buf, map);
+ if (ret) {
+ drm_dbg_driver(&vmw->drm,
+ "Wasn't able to map external bo!\n");
+ goto out;
+ }
+ ptr = map->vaddr;
+ } else {
+ ptr = vmw_bo_map_and_cache(bo);
+ }
+
+out:
+ return ptr;
+}
+
+static void unmap_external(struct vmw_bo *bo, struct iosys_map *map)
+{
+ if (bo->tbo.base.import_attach)
+ dma_buf_vunmap(bo->tbo.base.dma_buf, map);
+ else
+ vmw_bo_unmap(bo);
+}
+
+static int vmw_external_bo_copy(struct vmw_bo *dst, u32 dst_offset,
+ u32 dst_stride, struct vmw_bo *src,
+ u32 src_offset, u32 src_stride,
+ u32 width_in_bytes, u32 height,
+ struct vmw_diff_cpy *diff)
+{
+ struct vmw_private *vmw =
+ container_of(dst->tbo.bdev, struct vmw_private, bdev);
+ size_t dst_size = dst->tbo.resource->size;
+ size_t src_size = src->tbo.resource->size;
+ struct iosys_map dst_map = {0};
+ struct iosys_map src_map = {0};
+ int ret, i;
+ int x_in_bytes;
+ u8 *vsrc;
+ u8 *vdst;
+
+ vsrc = map_external(src, &src_map);
+ if (!vsrc) {
+ drm_dbg_driver(&vmw->drm, "Wasn't able to map src\n");
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ vdst = map_external(dst, &dst_map);
+ if (!vdst) {
+ drm_dbg_driver(&vmw->drm, "Wasn't able to map dst\n");
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ vsrc += src_offset;
+ vdst += dst_offset;
+ if (src_stride == dst_stride) {
+ dst_size -= dst_offset;
+ src_size -= src_offset;
+ memcpy(vdst, vsrc,
+ min(dst_stride * height, min(dst_size, src_size)));
+ } else {
+ WARN_ON(dst_stride < width_in_bytes);
+ for (i = 0; i < height; ++i) {
+ memcpy(vdst, vsrc, width_in_bytes);
+ vsrc += src_stride;
+ vdst += dst_stride;
+ }
+ }
+
+ x_in_bytes = (dst_offset % dst_stride);
+ diff->rect.x1 = x_in_bytes / diff->cpp;
+ diff->rect.y1 = ((dst_offset - x_in_bytes) / dst_stride);
+ diff->rect.x2 = diff->rect.x1 + width_in_bytes / diff->cpp;
+ diff->rect.y2 = diff->rect.y1 + height;
+
+ ret = 0;
+out:
+ unmap_external(src, &src_map);
+ unmap_external(dst, &dst_map);
+
+ return ret;
+}
+
/**
* vmw_bo_cpu_blit - in-kernel cpu blit.
*
- * @dst: Destination buffer object.
+ * @vmw_dst: Destination buffer object.
* @dst_offset: Destination offset of blit start in bytes.
* @dst_stride: Destination stride in bytes.
- * @src: Source buffer object.
+ * @vmw_src: Source buffer object.
* @src_offset: Source offset of blit start in bytes.
* @src_stride: Source stride in bytes.
* @w: Width of blit.
@@ -444,13 +538,15 @@ static int vmw_bo_cpu_blit_line(struct vmw_bo_blit_line_data *d,
* Neither of the buffer objects may be placed in PCI memory
* (Fixed memory in TTM terminology) when using this function.
*/
-int vmw_bo_cpu_blit(struct ttm_buffer_object *dst,
+int vmw_bo_cpu_blit(struct vmw_bo *vmw_dst,
u32 dst_offset, u32 dst_stride,
- struct ttm_buffer_object *src,
+ struct vmw_bo *vmw_src,
u32 src_offset, u32 src_stride,
u32 w, u32 h,
struct vmw_diff_cpy *diff)
{
+ struct ttm_buffer_object *src = &vmw_src->tbo;
+ struct ttm_buffer_object *dst = &vmw_dst->tbo;
struct ttm_operation_ctx ctx = {
.interruptible = false,
.no_wait_gpu = false
@@ -460,6 +556,11 @@ int vmw_bo_cpu_blit(struct ttm_buffer_object *dst,
int ret = 0;
struct page **dst_pages = NULL;
struct page **src_pages = NULL;
+ bool src_external = (src->ttm->page_flags & TTM_TT_FLAG_EXTERNAL) != 0;
+ bool dst_external = (dst->ttm->page_flags & TTM_TT_FLAG_EXTERNAL) != 0;
+
+ if (WARN_ON(dst == src))
+ return -EINVAL;
/* Buffer objects need to be either pinned or reserved: */
if (!(dst->pin_count))
@@ -479,6 +580,11 @@ int vmw_bo_cpu_blit(struct ttm_buffer_object *dst,
return ret;
}
+ if (src_external || dst_external)
+ return vmw_external_bo_copy(vmw_dst, dst_offset, dst_stride,
+ vmw_src, src_offset, src_stride,
+ w, h, diff);
+
if (!src->ttm->pages && src->ttm->sg) {
src_pages = kvmalloc_array(src->ttm->num_pages,
sizeof(struct page *), GFP_KERNEL);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
index 00144632c600..a0e433fbcba6 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
@@ -1,8 +1,8 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/**************************************************************************
*
- * Copyright © 2011-2023 VMware, Inc., Palo Alto, CA., USA
- * All Rights Reserved.
+ * Copyright (c) 2011-2024 Broadcom. All Rights Reserved. The term
+ * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
@@ -28,15 +28,39 @@
#include "vmwgfx_bo.h"
#include "vmwgfx_drv.h"
-
+#include "vmwgfx_resource_priv.h"
#include <drm/ttm/ttm_placement.h>
static void vmw_bo_release(struct vmw_bo *vbo)
{
+ struct vmw_resource *res;
+
WARN_ON(vbo->tbo.base.funcs &&
kref_read(&vbo->tbo.base.refcount) != 0);
vmw_bo_unmap(vbo);
+
+ xa_destroy(&vbo->detached_resources);
+ WARN_ON(vbo->is_dumb && !vbo->dumb_surface);
+ if (vbo->is_dumb && vbo->dumb_surface) {
+ res = &vbo->dumb_surface->res;
+ WARN_ON(vbo != res->guest_memory_bo);
+ WARN_ON(!res->guest_memory_bo);
+ if (res->guest_memory_bo) {
+ /* Reserve and switch the backing mob. */
+ mutex_lock(&res->dev_priv->cmdbuf_mutex);
+ (void)vmw_resource_reserve(res, false, true);
+ vmw_resource_mob_detach(res);
+ if (res->coherent)
+ vmw_bo_dirty_release(res->guest_memory_bo);
+ res->guest_memory_bo = NULL;
+ res->guest_memory_offset = 0;
+ vmw_resource_unreserve(res, false, false, false, NULL,
+ 0);
+ mutex_unlock(&res->dev_priv->cmdbuf_mutex);
+ }
+ vmw_surface_unreference(&vbo->dumb_surface);
+ }
drm_gem_object_release(&vbo->tbo.base);
}
@@ -326,18 +350,26 @@ void vmw_bo_pin_reserved(struct vmw_bo *vbo, bool pin)
*/
void *vmw_bo_map_and_cache(struct vmw_bo *vbo)
{
+ return vmw_bo_map_and_cache_size(vbo, vbo->tbo.base.size);
+}
+
+void *vmw_bo_map_and_cache_size(struct vmw_bo *vbo, size_t size)
+{
struct ttm_buffer_object *bo = &vbo->tbo;
bool not_used;
void *virtual;
int ret;
+ atomic_inc(&vbo->map_count);
+
virtual = ttm_kmap_obj_virtual(&vbo->map, &not_used);
if (virtual)
return virtual;
- ret = ttm_bo_kmap(bo, 0, PFN_UP(bo->base.size), &vbo->map);
+ ret = ttm_bo_kmap(bo, 0, PFN_UP(size), &vbo->map);
if (ret)
- DRM_ERROR("Buffer object map failed: %d.\n", ret);
+ DRM_ERROR("Buffer object map failed: %d (size: bo = %zu, map = %zu).\n",
+ ret, bo->base.size, size);
return ttm_kmap_obj_virtual(&vbo->map, &not_used);
}
@@ -353,11 +385,17 @@ void *vmw_bo_map_and_cache(struct vmw_bo *vbo)
*/
void vmw_bo_unmap(struct vmw_bo *vbo)
{
+ int map_count;
+
if (vbo->map.bo == NULL)
return;
- ttm_bo_kunmap(&vbo->map);
- vbo->map.bo = NULL;
+ map_count = atomic_dec_return(&vbo->map_count);
+
+ if (!map_count) {
+ ttm_bo_kunmap(&vbo->map);
+ vbo->map.bo = NULL;
+ }
}
@@ -390,6 +428,8 @@ static int vmw_bo_init(struct vmw_private *dev_priv,
BUILD_BUG_ON(TTM_MAX_BO_PRIORITY <= 3);
vmw_bo->tbo.priority = 3;
vmw_bo->res_tree = RB_ROOT;
+ xa_init(&vmw_bo->detached_resources);
+ atomic_set(&vmw_bo->map_count, 0);
params->size = ALIGN(params->size, PAGE_SIZE);
drm_gem_private_object_init(vdev, &vmw_bo->tbo.base, params->size);
@@ -654,52 +694,6 @@ void vmw_bo_fence_single(struct ttm_buffer_object *bo,
dma_fence_put(&fence->base);
}
-
-/**
- * vmw_dumb_create - Create a dumb kms buffer
- *
- * @file_priv: Pointer to a struct drm_file identifying the caller.
- * @dev: Pointer to the drm device.
- * @args: Pointer to a struct drm_mode_create_dumb structure
- * Return: Zero on success, negative error code on failure.
- *
- * This is a driver callback for the core drm create_dumb functionality.
- * Note that this is very similar to the vmw_bo_alloc ioctl, except
- * that the arguments have a different format.
- */
-int vmw_dumb_create(struct drm_file *file_priv,
- struct drm_device *dev,
- struct drm_mode_create_dumb *args)
-{
- struct vmw_private *dev_priv = vmw_priv(dev);
- struct vmw_bo *vbo;
- int cpp = DIV_ROUND_UP(args->bpp, 8);
- int ret;
-
- switch (cpp) {
- case 1: /* DRM_FORMAT_C8 */
- case 2: /* DRM_FORMAT_RGB565 */
- case 4: /* DRM_FORMAT_XRGB8888 */
- break;
- default:
- /*
- * Dumb buffers don't allow anything else.
- * This is tested via IGT's dumb_buffers
- */
- return -EINVAL;
- }
-
- args->pitch = args->width * cpp;
- args->size = ALIGN(args->pitch * args->height, PAGE_SIZE);
-
- ret = vmw_gem_object_create_with_handle(dev_priv, file_priv,
- args->size, &args->handle,
- &vbo);
- /* drop reference from allocate - handle holds it now */
- drm_gem_object_put(&vbo->tbo.base);
- return ret;
-}
-
/**
* vmw_bo_swap_notify - swapout notify callback.
*
@@ -853,3 +847,43 @@ void vmw_bo_placement_set_default_accelerated(struct vmw_bo *bo)
vmw_bo_placement_set(bo, domain, domain);
}
+
+void vmw_bo_add_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res)
+{
+ xa_store(&vbo->detached_resources, (unsigned long)res, res, GFP_KERNEL);
+}
+
+void vmw_bo_del_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res)
+{
+ xa_erase(&vbo->detached_resources, (unsigned long)res);
+}
+
+struct vmw_surface *vmw_bo_surface(struct vmw_bo *vbo)
+{
+ unsigned long index;
+ struct vmw_resource *res = NULL;
+ struct vmw_surface *surf = NULL;
+ struct rb_node *rb_itr = vbo->res_tree.rb_node;
+
+ if (vbo->is_dumb && vbo->dumb_surface) {
+ res = &vbo->dumb_surface->res;
+ goto out;
+ }
+
+ xa_for_each(&vbo->detached_resources, index, res) {
+ if (res->func->res_type == vmw_res_surface)
+ goto out;
+ }
+
+ for (rb_itr = rb_first(&vbo->res_tree); rb_itr;
+ rb_itr = rb_next(rb_itr)) {
+ res = rb_entry(rb_itr, struct vmw_resource, mob_node);
+ if (res->func->res_type == vmw_res_surface)
+ goto out;
+ }
+
+out:
+ if (res)
+ surf = vmw_res_to_srf(res);
+ return surf;
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h
index f349642e6190..43b5439ec9f7 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h
@@ -1,7 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/**************************************************************************
*
- * Copyright 2023 VMware, Inc., Palo Alto, CA., USA
+ * Copyright (c) 2023-2024 Broadcom. All Rights Reserved. The term
+ * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
@@ -35,11 +36,13 @@
#include <linux/rbtree_types.h>
#include <linux/types.h>
+#include <linux/xarray.h>
struct vmw_bo_dirty;
struct vmw_fence_obj;
struct vmw_private;
struct vmw_resource;
+struct vmw_surface;
enum vmw_bo_domain {
VMW_BO_DOMAIN_SYS = BIT(0),
@@ -68,6 +71,8 @@ struct vmw_bo_params {
* @map: Kmap object for semi-persistent mappings
* @res_tree: RB tree of resources using this buffer object as a backing MOB
* @res_prios: Eviction priority counts for attached resources
+ * @map_count: The number of currently active maps. Will differ from the
+ * cpu_writers because it includes kernel maps.
* @cpu_writers: Number of synccpu write grabs. Protected by reservation when
* increased. May be decreased without reservation.
* @dx_query_ctx: DX context if this buffer object is used as a DX query MOB
@@ -85,11 +90,16 @@ struct vmw_bo {
struct rb_root res_tree;
u32 res_prios[TTM_MAX_BO_PRIORITY];
+ struct xarray detached_resources;
+ atomic_t map_count;
atomic_t cpu_writers;
/* Not ref-counted. Protected by binding_mutex */
struct vmw_resource *dx_query_ctx;
struct vmw_bo_dirty *dirty;
+
+ bool is_dumb;
+ struct vmw_surface *dumb_surface;
};
void vmw_bo_placement_set(struct vmw_bo *bo, u32 domain, u32 busy_domain);
@@ -124,15 +134,21 @@ void vmw_bo_fence_single(struct ttm_buffer_object *bo,
struct vmw_fence_obj *fence);
void *vmw_bo_map_and_cache(struct vmw_bo *vbo);
+void *vmw_bo_map_and_cache_size(struct vmw_bo *vbo, size_t size);
void vmw_bo_unmap(struct vmw_bo *vbo);
void vmw_bo_move_notify(struct ttm_buffer_object *bo,
struct ttm_resource *mem);
void vmw_bo_swap_notify(struct ttm_buffer_object *bo);
+void vmw_bo_add_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res);
+void vmw_bo_del_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res);
+struct vmw_surface *vmw_bo_surface(struct vmw_bo *vbo);
+
int vmw_user_bo_lookup(struct drm_file *filp,
u32 handle,
struct vmw_bo **out);
+
/**
* vmw_bo_adjust_prio - Adjust the buffer object eviction priority
* according to attached resources
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index a1ce41e1c468..3f4719b3c268 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -1,7 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/**************************************************************************
*
- * Copyright 2009-2023 VMware, Inc., Palo Alto, CA., USA
+ * Copyright (c) 2009-2024 Broadcom. All Rights Reserved. The term
+ * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
@@ -763,6 +764,26 @@ extern int vmw_gmr_bind(struct vmw_private *dev_priv,
extern void vmw_gmr_unbind(struct vmw_private *dev_priv, int gmr_id);
/**
+ * User handles
+ */
+struct vmw_user_object {
+ struct vmw_surface *surface;
+ struct vmw_bo *buffer;
+};
+
+int vmw_user_object_lookup(struct vmw_private *dev_priv, struct drm_file *filp,
+ u32 handle, struct vmw_user_object *uo);
+struct vmw_user_object *vmw_user_object_ref(struct vmw_user_object *uo);
+void vmw_user_object_unref(struct vmw_user_object *uo);
+bool vmw_user_object_is_null(struct vmw_user_object *uo);
+struct vmw_surface *vmw_user_object_surface(struct vmw_user_object *uo);
+struct vmw_bo *vmw_user_object_buffer(struct vmw_user_object *uo);
+void *vmw_user_object_map(struct vmw_user_object *uo);
+void *vmw_user_object_map_size(struct vmw_user_object *uo, size_t size);
+void vmw_user_object_unmap(struct vmw_user_object *uo);
+bool vmw_user_object_is_mapped(struct vmw_user_object *uo);
+
+/**
* Resource utilities - vmwgfx_resource.c
*/
struct vmw_user_resource_conv;
@@ -776,11 +797,6 @@ extern int vmw_resource_validate(struct vmw_resource *res, bool intr,
extern int vmw_resource_reserve(struct vmw_resource *res, bool interruptible,
bool no_backup);
extern bool vmw_resource_needs_backup(const struct vmw_resource *res);
-extern int vmw_user_lookup_handle(struct vmw_private *dev_priv,
- struct drm_file *filp,
- uint32_t handle,
- struct vmw_surface **out_surf,
- struct vmw_bo **out_buf);
extern int vmw_user_resource_lookup_handle(
struct vmw_private *dev_priv,
struct ttm_object_file *tfile,
@@ -1057,9 +1073,6 @@ int vmw_kms_suspend(struct drm_device *dev);
int vmw_kms_resume(struct drm_device *dev);
void vmw_kms_lost_device(struct drm_device *dev);
-int vmw_dumb_create(struct drm_file *file_priv,
- struct drm_device *dev,
- struct drm_mode_create_dumb *args);
extern int vmw_resource_pin(struct vmw_resource *res, bool interruptible);
extern void vmw_resource_unpin(struct vmw_resource *res);
extern enum vmw_res_type vmw_res_type(const struct vmw_resource *res);
@@ -1176,6 +1189,15 @@ extern int vmw_gb_surface_reference_ext_ioctl(struct drm_device *dev,
int vmw_gb_surface_define(struct vmw_private *dev_priv,
const struct vmw_surface_metadata *req,
struct vmw_surface **srf_out);
+struct vmw_surface *vmw_lookup_surface_for_buffer(struct vmw_private *vmw,
+ struct vmw_bo *bo,
+ u32 handle);
+u32 vmw_lookup_surface_handle_for_buffer(struct vmw_private *vmw,
+ struct vmw_bo *bo,
+ u32 handle);
+int vmw_dumb_create(struct drm_file *file_priv,
+ struct drm_device *dev,
+ struct drm_mode_create_dumb *args);
/*
* Shader management - vmwgfx_shader.c
@@ -1331,9 +1353,9 @@ void vmw_diff_memcpy(struct vmw_diff_cpy *diff, u8 *dest, const u8 *src,
void vmw_memcpy(struct vmw_diff_cpy *diff, u8 *dest, const u8 *src, size_t n);
-int vmw_bo_cpu_blit(struct ttm_buffer_object *dst,
+int vmw_bo_cpu_blit(struct vmw_bo *dst,
u32 dst_offset, u32 dst_stride,
- struct ttm_buffer_object *src,
+ struct vmw_bo *src,
u32 src_offset, u32 src_stride,
u32 w, u32 h,
struct vmw_diff_cpy *diff);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index 5efc6a766f64..588d50ababf6 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -32,7 +32,6 @@
#define VMW_FENCE_WRAP (1 << 31)
struct vmw_fence_manager {
- int num_fence_objects;
struct vmw_private *dev_priv;
spinlock_t lock;
struct list_head fence_list;
@@ -124,13 +123,13 @@ static void vmw_fence_obj_destroy(struct dma_fence *f)
{
struct vmw_fence_obj *fence =
container_of(f, struct vmw_fence_obj, base);
-
struct vmw_fence_manager *fman = fman_from_fence(fence);
- spin_lock(&fman->lock);
- list_del_init(&fence->head);
- --fman->num_fence_objects;
- spin_unlock(&fman->lock);
+ if (!list_empty(&fence->head)) {
+ spin_lock(&fman->lock);
+ list_del_init(&fence->head);
+ spin_unlock(&fman->lock);
+ }
fence->destroy(fence);
}
@@ -257,7 +256,6 @@ static const struct dma_fence_ops vmw_fence_ops = {
.release = vmw_fence_obj_destroy,
};
-
/*
* Execute signal actions on fences recently signaled.
* This is done from a workqueue so we don't have to execute
@@ -355,7 +353,6 @@ static int vmw_fence_obj_init(struct vmw_fence_manager *fman,
goto out_unlock;
}
list_add_tail(&fence->head, &fman->fence_list);
- ++fman->num_fence_objects;
out_unlock:
spin_unlock(&fman->lock);
@@ -403,7 +400,7 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman,
u32 passed_seqno)
{
u32 goal_seqno;
- struct vmw_fence_obj *fence;
+ struct vmw_fence_obj *fence, *next_fence;
if (likely(!fman->seqno_valid))
return false;
@@ -413,7 +410,7 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman,
return false;
fman->seqno_valid = false;
- list_for_each_entry(fence, &fman->fence_list, head) {
+ list_for_each_entry_safe(fence, next_fence, &fman->fence_list, head) {
if (!list_empty(&fence->seq_passed_actions)) {
fman->seqno_valid = true;
vmw_fence_goal_write(fman->dev_priv,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c
index 07185c108218..b9857f37ca1a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
- * Copyright 2021-2023 VMware, Inc.
+ * Copyright (c) 2021-2024 Broadcom. All Rights Reserved. The term
+ * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
@@ -78,6 +79,59 @@ static struct sg_table *vmw_gem_object_get_sg_table(struct drm_gem_object *obj)
return drm_prime_pages_to_sg(obj->dev, vmw_tt->dma_ttm.pages, vmw_tt->dma_ttm.num_pages);
}
+static int vmw_gem_vmap(struct drm_gem_object *obj, struct iosys_map *map)
+{
+ struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(obj);
+ int ret;
+
+ if (obj->import_attach) {
+ ret = dma_buf_vmap(obj->import_attach->dmabuf, map);
+ if (!ret) {
+ if (drm_WARN_ON(obj->dev, map->is_iomem)) {
+ dma_buf_vunmap(obj->import_attach->dmabuf, map);
+ return -EIO;
+ }
+ }
+ } else {
+ ret = ttm_bo_vmap(bo, map);
+ }
+
+ return ret;
+}
+
+static void vmw_gem_vunmap(struct drm_gem_object *obj, struct iosys_map *map)
+{
+ if (obj->import_attach)
+ dma_buf_vunmap(obj->import_attach->dmabuf, map);
+ else
+ drm_gem_ttm_vunmap(obj, map);
+}
+
+static int vmw_gem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+{
+ int ret;
+
+ if (obj->import_attach) {
+ /*
+ * Reset both vm_ops and vm_private_data, so we don't end up with
+ * vm_ops pointing to our implementation if the dma-buf backend
+ * doesn't set those fields.
+ */
+ vma->vm_private_data = NULL;
+ vma->vm_ops = NULL;
+
+ ret = dma_buf_mmap(obj->dma_buf, vma, 0);
+
+ /* Drop the reference drm_gem_mmap_obj() acquired.*/
+ if (!ret)
+ drm_gem_object_put(obj);
+
+ return ret;
+ }
+
+ return drm_gem_ttm_mmap(obj, vma);
+}
+
static const struct vm_operations_struct vmw_vm_ops = {
.pfn_mkwrite = vmw_bo_vm_mkwrite,
.page_mkwrite = vmw_bo_vm_mkwrite,
@@ -94,9 +148,9 @@ static const struct drm_gem_object_funcs vmw_gem_object_funcs = {
.pin = vmw_gem_object_pin,
.unpin = vmw_gem_object_unpin,
.get_sg_table = vmw_gem_object_get_sg_table,
- .vmap = drm_gem_ttm_vmap,
- .vunmap = drm_gem_ttm_vunmap,
- .mmap = drm_gem_ttm_mmap,
+ .vmap = vmw_gem_vmap,
+ .vunmap = vmw_gem_vunmap,
+ .mmap = vmw_gem_mmap,
.vm_ops = &vmw_vm_ops,
};
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 00c4ff684130..288ed0bb75cb 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -1,7 +1,8 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/**************************************************************************
*
- * Copyright 2009-2023 VMware, Inc., Palo Alto, CA., USA
+ * Copyright (c) 2009-2024 Broadcom. All Rights Reserved. The term
+ * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
@@ -193,13 +194,16 @@ static u32 vmw_du_cursor_mob_size(u32 w, u32 h)
*/
static u32 *vmw_du_cursor_plane_acquire_image(struct vmw_plane_state *vps)
{
- if (vps->surf) {
- if (vps->surf_mapped)
- return vmw_bo_map_and_cache(vps->surf->res.guest_memory_bo);
- return vps->surf->snooper.image;
- } else if (vps->bo)
- return vmw_bo_map_and_cache(vps->bo);
- return NULL;
+ struct vmw_surface *surf;
+
+ if (vmw_user_object_is_null(&vps->uo))
+ return NULL;
+
+ surf = vmw_user_object_surface(&vps->uo);
+ if (surf && !vmw_user_object_is_mapped(&vps->uo))
+ return surf->snooper.image;
+
+ return vmw_user_object_map(&vps->uo);
}
static bool vmw_du_cursor_plane_has_changed(struct vmw_plane_state *old_vps,
@@ -536,22 +540,16 @@ void vmw_du_primary_plane_destroy(struct drm_plane *plane)
* vmw_du_plane_unpin_surf - unpins resource associated with a framebuffer surface
*
* @vps: plane state associated with the display surface
- * @unreference: true if we also want to unreference the display.
*/
-void vmw_du_plane_unpin_surf(struct vmw_plane_state *vps,
- bool unreference)
+void vmw_du_plane_unpin_surf(struct vmw_plane_state *vps)
{
- if (vps->surf) {
+ struct vmw_surface *surf = vmw_user_object_surface(&vps->uo);
+
+ if (surf) {
if (vps->pinned) {
- vmw_resource_unpin(&vps->surf->res);
+ vmw_resource_unpin(&surf->res);
vps->pinned--;
}
-
- if (unreference) {
- if (vps->pinned)
- DRM_ERROR("Surface still pinned\n");
- vmw_surface_unreference(&vps->surf);
- }
}
}
@@ -572,7 +570,7 @@ vmw_du_plane_cleanup_fb(struct drm_plane *plane,
{
struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state);
- vmw_du_plane_unpin_surf(vps, false);
+ vmw_du_plane_unpin_surf(vps);
}
@@ -661,25 +659,14 @@ vmw_du_cursor_plane_cleanup_fb(struct drm_plane *plane,
struct vmw_cursor_plane *vcp = vmw_plane_to_vcp(plane);
struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state);
- if (vps->surf_mapped) {
- vmw_bo_unmap(vps->surf->res.guest_memory_bo);
- vps->surf_mapped = false;
- }
+ if (!vmw_user_object_is_null(&vps->uo))
+ vmw_user_object_unmap(&vps->uo);
vmw_du_cursor_plane_unmap_cm(vps);
vmw_du_put_cursor_mob(vcp, vps);
- vmw_du_plane_unpin_surf(vps, false);
-
- if (vps->surf) {
- vmw_surface_unreference(&vps->surf);
- vps->surf = NULL;
- }
-
- if (vps->bo) {
- vmw_bo_unreference(&vps->bo);
- vps->bo = NULL;
- }
+ vmw_du_plane_unpin_surf(vps);
+ vmw_user_object_unref(&vps->uo);
}
@@ -698,64 +685,48 @@ vmw_du_cursor_plane_prepare_fb(struct drm_plane *plane,
struct drm_framebuffer *fb = new_state->fb;
struct vmw_cursor_plane *vcp = vmw_plane_to_vcp(plane);
struct vmw_plane_state *vps = vmw_plane_state_to_vps(new_state);
+ struct vmw_bo *bo = NULL;
int ret = 0;
- if (vps->surf) {
- if (vps->surf_mapped) {
- vmw_bo_unmap(vps->surf->res.guest_memory_bo);
- vps->surf_mapped = false;
- }
- vmw_surface_unreference(&vps->surf);
- vps->surf = NULL;
- }
-
- if (vps->bo) {
- vmw_bo_unreference(&vps->bo);
- vps->bo = NULL;
+ if (!vmw_user_object_is_null(&vps->uo)) {
+ vmw_user_object_unmap(&vps->uo);
+ vmw_user_object_unref(&vps->uo);
}
if (fb) {
if (vmw_framebuffer_to_vfb(fb)->bo) {
- vps->bo = vmw_framebuffer_to_vfbd(fb)->buffer;
- vmw_bo_reference(vps->bo);
+ vps->uo.buffer = vmw_framebuffer_to_vfbd(fb)->buffer;
+ vps->uo.surface = NULL;
} else {
- vps->surf = vmw_framebuffer_to_vfbs(fb)->surface;
- vmw_surface_reference(vps->surf);
+ memcpy(&vps->uo, &vmw_framebuffer_to_vfbs(fb)->uo, sizeof(vps->uo));
}
+ vmw_user_object_ref(&vps->uo);
}
- if (!vps->surf && vps->bo) {
- const u32 size = new_state->crtc_w * new_state->crtc_h * sizeof(u32);
+ bo = vmw_user_object_buffer(&vps->uo);
+ if (bo) {
+ struct ttm_operation_ctx ctx = {false, false};
- /*
- * Not using vmw_bo_map_and_cache() helper here as we need to
- * reserve the ttm_buffer_object first which
- * vmw_bo_map_and_cache() omits.
- */
- ret = ttm_bo_reserve(&vps->bo->tbo, true, false, NULL);
-
- if (unlikely(ret != 0))
+ ret = ttm_bo_reserve(&bo->tbo, true, false, NULL);
+ if (ret != 0)
return -ENOMEM;
- ret = ttm_bo_kmap(&vps->bo->tbo, 0, PFN_UP(size), &vps->bo->map);
-
- ttm_bo_unreserve(&vps->bo->tbo);
-
- if (unlikely(ret != 0))
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (ret != 0)
return -ENOMEM;
- } else if (vps->surf && !vps->bo && vps->surf->res.guest_memory_bo) {
- WARN_ON(vps->surf->snooper.image);
- ret = ttm_bo_reserve(&vps->surf->res.guest_memory_bo->tbo, true, false,
- NULL);
- if (unlikely(ret != 0))
- return -ENOMEM;
- vmw_bo_map_and_cache(vps->surf->res.guest_memory_bo);
- ttm_bo_unreserve(&vps->surf->res.guest_memory_bo->tbo);
- vps->surf_mapped = true;
+ vmw_bo_pin_reserved(bo, true);
+ if (vmw_framebuffer_to_vfb(fb)->bo) {
+ const u32 size = new_state->crtc_w * new_state->crtc_h * sizeof(u32);
+
+ (void)vmw_bo_map_and_cache_size(bo, size);
+ } else {
+ vmw_bo_map_and_cache(bo);
+ }
+ ttm_bo_unreserve(&bo->tbo);
}
- if (vps->surf || vps->bo) {
+ if (!vmw_user_object_is_null(&vps->uo)) {
vmw_du_get_cursor_mob(vcp, vps);
vmw_du_cursor_plane_map_cm(vps);
}
@@ -777,14 +748,17 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane,
struct vmw_display_unit *du = vmw_crtc_to_du(crtc);
struct vmw_plane_state *vps = vmw_plane_state_to_vps(new_state);
struct vmw_plane_state *old_vps = vmw_plane_state_to_vps(old_state);
+ struct vmw_bo *old_bo = NULL;
+ struct vmw_bo *new_bo = NULL;
s32 hotspot_x, hotspot_y;
+ int ret;
hotspot_x = du->hotspot_x + new_state->hotspot_x;
hotspot_y = du->hotspot_y + new_state->hotspot_y;
- du->cursor_surface = vps->surf;
+ du->cursor_surface = vmw_user_object_surface(&vps->uo);
- if (!vps->surf && !vps->bo) {
+ if (vmw_user_object_is_null(&vps->uo)) {
vmw_cursor_update_position(dev_priv, false, 0, 0);
return;
}
@@ -792,10 +766,26 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane,
vps->cursor.hotspot_x = hotspot_x;
vps->cursor.hotspot_y = hotspot_y;
- if (vps->surf) {
+ if (du->cursor_surface)
du->cursor_age = du->cursor_surface->snooper.age;
+
+ if (!vmw_user_object_is_null(&old_vps->uo)) {
+ old_bo = vmw_user_object_buffer(&old_vps->uo);
+ ret = ttm_bo_reserve(&old_bo->tbo, false, false, NULL);
+ if (ret != 0)
+ return;
}
+ if (!vmw_user_object_is_null(&vps->uo)) {
+ new_bo = vmw_user_object_buffer(&vps->uo);
+ if (old_bo != new_bo) {
+ ret = ttm_bo_reserve(&new_bo->tbo, false, false, NULL);
+ if (ret != 0)
+ return;
+ } else {
+ new_bo = NULL;
+ }
+ }
if (!vmw_du_cursor_plane_has_changed(old_vps, vps)) {
/*
* If it hasn't changed, avoid making the device do extra
@@ -813,6 +803,11 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane,
hotspot_x, hotspot_y);
}
+ if (old_bo)
+ ttm_bo_unreserve(&old_bo->tbo);
+ if (new_bo)
+ ttm_bo_unreserve(&new_bo->tbo);
+
du->cursor_x = new_state->crtc_x + du->set_gui_x;
du->cursor_y = new_state->crtc_y + du->set_gui_y;
@@ -913,7 +908,7 @@ int vmw_du_cursor_plane_atomic_check(struct drm_plane *plane,
}
if (!vmw_framebuffer_to_vfb(fb)->bo) {
- surface = vmw_framebuffer_to_vfbs(fb)->surface;
+ surface = vmw_user_object_surface(&vmw_framebuffer_to_vfbs(fb)->uo);
WARN_ON(!surface);
@@ -1074,12 +1069,7 @@ vmw_du_plane_duplicate_state(struct drm_plane *plane)
memset(&vps->cursor, 0, sizeof(vps->cursor));
/* Each ref counted resource needs to be acquired again */
- if (vps->surf)
- (void) vmw_surface_reference(vps->surf);
-
- if (vps->bo)
- (void) vmw_bo_reference(vps->bo);
-
+ vmw_user_object_ref(&vps->uo);
state = &vps->base;
__drm_atomic_helper_plane_duplicate_state(plane, state);
@@ -1128,11 +1118,7 @@ vmw_du_plane_destroy_state(struct drm_plane *plane,
struct vmw_plane_state *vps = vmw_plane_state_to_vps(state);
/* Should have been freed by cleanup_fb */
- if (vps->surf)
- vmw_surface_unreference(&vps->surf);
-
- if (vps->bo)
- vmw_bo_unreference(&vps->bo);
+ vmw_user_object_unref(&vps->uo);
drm_atomic_helper_plane_destroy_state(plane, state);
}
@@ -1227,7 +1213,7 @@ static void vmw_framebuffer_surface_destroy(struct drm_framebuffer *framebuffer)
vmw_framebuffer_to_vfbs(framebuffer);
drm_framebuffer_cleanup(framebuffer);
- vmw_surface_unreference(&vfbs->surface);
+ vmw_user_object_unref(&vfbs->uo);
kfree(vfbs);
}
@@ -1272,29 +1258,41 @@ int vmw_kms_readback(struct vmw_private *dev_priv,
return -ENOSYS;
}
+static int vmw_framebuffer_surface_create_handle(struct drm_framebuffer *fb,
+ struct drm_file *file_priv,
+ unsigned int *handle)
+{
+ struct vmw_framebuffer_surface *vfbs = vmw_framebuffer_to_vfbs(fb);
+ struct vmw_bo *bo = vmw_user_object_buffer(&vfbs->uo);
+
+ return drm_gem_handle_create(file_priv, &bo->tbo.base, handle);
+}
static const struct drm_framebuffer_funcs vmw_framebuffer_surface_funcs = {
+ .create_handle = vmw_framebuffer_surface_create_handle,
.destroy = vmw_framebuffer_surface_destroy,
.dirty = drm_atomic_helper_dirtyfb,
};
static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv,
- struct vmw_surface *surface,
+ struct vmw_user_object *uo,
struct vmw_framebuffer **out,
const struct drm_mode_fb_cmd2
- *mode_cmd,
- bool is_bo_proxy)
+ *mode_cmd)
{
struct drm_device *dev = &dev_priv->drm;
struct vmw_framebuffer_surface *vfbs;
enum SVGA3dSurfaceFormat format;
+ struct vmw_surface *surface;
int ret;
/* 3D is only supported on HWv8 and newer hosts */
if (dev_priv->active_display_unit == vmw_du_legacy)
return -ENOSYS;
+ surface = vmw_user_object_surface(uo);
+
/*
* Sanity checks.
*/
@@ -1357,8 +1355,8 @@ static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv,
}
drm_helper_mode_fill_fb_struct(dev, &vfbs->base.base, mode_cmd);
- vfbs->surface = vmw_surface_reference(surface);
- vfbs->is_bo_proxy = is_bo_proxy;
+ memcpy(&vfbs->uo, uo, sizeof(vfbs->uo));
+ vmw_user_object_ref(&vfbs->uo);
*out = &vfbs->base;
@@ -1370,7 +1368,7 @@ static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv,
return 0;
out_err2:
- vmw_surface_unreference(&surface);
+ vmw_user_object_unref(&vfbs->uo);
kfree(vfbs);
out_err1:
return ret;
@@ -1386,7 +1384,6 @@ static int vmw_framebuffer_bo_create_handle(struct drm_framebuffer *fb,
{
struct vmw_framebuffer_bo *vfbd =
vmw_framebuffer_to_vfbd(fb);
-
return drm_gem_handle_create(file_priv, &vfbd->buffer->tbo.base, handle);
}
@@ -1407,86 +1404,6 @@ static const struct drm_framebuffer_funcs vmw_framebuffer_bo_funcs = {
.dirty = drm_atomic_helper_dirtyfb,
};
-/**
- * vmw_create_bo_proxy - create a proxy surface for the buffer object
- *
- * @dev: DRM device
- * @mode_cmd: parameters for the new surface
- * @bo_mob: MOB backing the buffer object
- * @srf_out: newly created surface
- *
- * When the content FB is a buffer object, we create a surface as a proxy to the
- * same buffer. This way we can do a surface copy rather than a surface DMA.
- * This is a more efficient approach
- *
- * RETURNS:
- * 0 on success, error code otherwise
- */
-static int vmw_create_bo_proxy(struct drm_device *dev,
- const struct drm_mode_fb_cmd2 *mode_cmd,
- struct vmw_bo *bo_mob,
- struct vmw_surface **srf_out)
-{
- struct vmw_surface_metadata metadata = {0};
- uint32_t format;
- struct vmw_resource *res;
- unsigned int bytes_pp;
- int ret;
-
- switch (mode_cmd->pixel_format) {
- case DRM_FORMAT_ARGB8888:
- case DRM_FORMAT_XRGB8888:
- format = SVGA3D_X8R8G8B8;
- bytes_pp = 4;
- break;
-
- case DRM_FORMAT_RGB565:
- case DRM_FORMAT_XRGB1555:
- format = SVGA3D_R5G6B5;
- bytes_pp = 2;
- break;
-
- case 8:
- format = SVGA3D_P8;
- bytes_pp = 1;
- break;
-
- default:
- DRM_ERROR("Invalid framebuffer format %p4cc\n",
- &mode_cmd->pixel_format);
- return -EINVAL;
- }
-
- metadata.format = format;
- metadata.mip_levels[0] = 1;
- metadata.num_sizes = 1;
- metadata.base_size.width = mode_cmd->pitches[0] / bytes_pp;
- metadata.base_size.height = mode_cmd->height;
- metadata.base_size.depth = 1;
- metadata.scanout = true;
-
- ret = vmw_gb_surface_define(vmw_priv(dev), &metadata, srf_out);
- if (ret) {
- DRM_ERROR("Failed to allocate proxy content buffer\n");
- return ret;
- }
-
- res = &(*srf_out)->res;
-
- /* Reserve and switch the backing mob. */
- mutex_lock(&res->dev_priv->cmdbuf_mutex);
- (void) vmw_resource_reserve(res, false, true);
- vmw_user_bo_unref(&res->guest_memory_bo);
- res->guest_memory_bo = vmw_user_bo_ref(bo_mob);
- res->guest_memory_offset = 0;
- vmw_resource_unreserve(res, false, false, false, NULL, 0);
- mutex_unlock(&res->dev_priv->cmdbuf_mutex);
-
- return 0;
-}
-
-
-
static int vmw_kms_new_framebuffer_bo(struct vmw_private *dev_priv,
struct vmw_bo *bo,
struct vmw_framebuffer **out,
@@ -1565,55 +1482,24 @@ vmw_kms_srf_ok(struct vmw_private *dev_priv, uint32_t width, uint32_t height)
* vmw_kms_new_framebuffer - Create a new framebuffer.
*
* @dev_priv: Pointer to device private struct.
- * @bo: Pointer to buffer object to wrap the kms framebuffer around.
- * Either @bo or @surface must be NULL.
- * @surface: Pointer to a surface to wrap the kms framebuffer around.
- * Either @bo or @surface must be NULL.
- * @only_2d: No presents will occur to this buffer object based framebuffer.
- * This helps the code to do some important optimizations.
+ * @uo: Pointer to user object to wrap the kms framebuffer around.
+ * Either the buffer or surface inside the user object must be NULL.
* @mode_cmd: Frame-buffer metadata.
*/
struct vmw_framebuffer *
vmw_kms_new_framebuffer(struct vmw_private *dev_priv,
- struct vmw_bo *bo,
- struct vmw_surface *surface,
- bool only_2d,
+ struct vmw_user_object *uo,
const struct drm_mode_fb_cmd2 *mode_cmd)
{
struct vmw_framebuffer *vfb = NULL;
- bool is_bo_proxy = false;
int ret;
- /*
- * We cannot use the SurfaceDMA command in an non-accelerated VM,
- * therefore, wrap the buffer object in a surface so we can use the
- * SurfaceCopy command.
- */
- if (vmw_kms_srf_ok(dev_priv, mode_cmd->width, mode_cmd->height) &&
- bo && only_2d &&
- mode_cmd->width > 64 && /* Don't create a proxy for cursor */
- dev_priv->active_display_unit == vmw_du_screen_target) {
- ret = vmw_create_bo_proxy(&dev_priv->drm, mode_cmd,
- bo, &surface);
- if (ret)
- return ERR_PTR(ret);
-
- is_bo_proxy = true;
- }
-
/* Create the new framebuffer depending one what we have */
- if (surface) {
- ret = vmw_kms_new_framebuffer_surface(dev_priv, surface, &vfb,
- mode_cmd,
- is_bo_proxy);
- /*
- * vmw_create_bo_proxy() adds a reference that is no longer
- * needed
- */
- if (is_bo_proxy)
- vmw_surface_unreference(&surface);
- } else if (bo) {
- ret = vmw_kms_new_framebuffer_bo(dev_priv, bo, &vfb,
+ if (vmw_user_object_surface(uo)) {
+ ret = vmw_kms_new_framebuffer_surface(dev_priv, uo, &vfb,
+ mode_cmd);
+ } else if (uo->buffer) {
+ ret = vmw_kms_new_framebuffer_bo(dev_priv, uo->buffer, &vfb,
mode_cmd);
} else {
BUG();
@@ -1635,14 +1521,12 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev,
{
struct vmw_private *dev_priv = vmw_priv(dev);
struct vmw_framebuffer *vfb = NULL;
- struct vmw_surface *surface = NULL;
- struct vmw_bo *bo = NULL;
+ struct vmw_user_object uo = {0};
int ret;
/* returns either a bo or surface */
- ret = vmw_user_lookup_handle(dev_priv, file_priv,
- mode_cmd->handles[0],
- &surface, &bo);
+ ret = vmw_user_object_lookup(dev_priv, file_priv, mode_cmd->handles[0],
+ &uo);
if (ret) {
DRM_ERROR("Invalid buffer object handle %u (0x%x).\n",
mode_cmd->handles[0], mode_cmd->handles[0]);
@@ -1650,7 +1534,7 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev,
}
- if (!bo &&
+ if (vmw_user_object_surface(&uo) &&
!vmw_kms_srf_ok(dev_priv, mode_cmd->width, mode_cmd->height)) {
DRM_ERROR("Surface size cannot exceed %dx%d\n",
dev_priv->texture_max_width,
@@ -1659,20 +1543,15 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev,
}
- vfb = vmw_kms_new_framebuffer(dev_priv, bo, surface,
- !(dev_priv->capabilities & SVGA_CAP_3D),
- mode_cmd);
+ vfb = vmw_kms_new_framebuffer(dev_priv, &uo, mode_cmd);
if (IS_ERR(vfb)) {
ret = PTR_ERR(vfb);
goto err_out;
}
err_out:
- /* vmw_user_lookup_handle takes one ref so does new_fb */
- if (bo)
- vmw_user_bo_unref(&bo);
- if (surface)
- vmw_surface_unreference(&surface);
+ /* vmw_user_object_lookup takes one ref so does new_fb */
+ vmw_user_object_unref(&uo);
if (ret) {
DRM_ERROR("failed to create vmw_framebuffer: %i\n", ret);
@@ -2585,72 +2464,6 @@ void vmw_kms_helper_validation_finish(struct vmw_private *dev_priv,
}
/**
- * vmw_kms_update_proxy - Helper function to update a proxy surface from
- * its backing MOB.
- *
- * @res: Pointer to the surface resource
- * @clips: Clip rects in framebuffer (surface) space.
- * @num_clips: Number of clips in @clips.
- * @increment: Integer with which to increment the clip counter when looping.
- * Used to skip a predetermined number of clip rects.
- *
- * This function makes sure the proxy surface is updated from its backing MOB
- * using the region given by @clips. The surface resource @res and its backing
- * MOB needs to be reserved and validated on call.
- */
-int vmw_kms_update_proxy(struct vmw_resource *res,
- const struct drm_clip_rect *clips,
- unsigned num_clips,
- int increment)
-{
- struct vmw_private *dev_priv = res->dev_priv;
- struct drm_vmw_size *size = &vmw_res_to_srf(res)->metadata.base_size;
- struct {
- SVGA3dCmdHeader header;
- SVGA3dCmdUpdateGBImage body;
- } *cmd;
- SVGA3dBox *box;
- size_t copy_size = 0;
- int i;
-
- if (!clips)
- return 0;
-
- cmd = VMW_CMD_RESERVE(dev_priv, sizeof(*cmd) * num_clips);
- if (!cmd)
- return -ENOMEM;
-
- for (i = 0; i < num_clips; ++i, clips += increment, ++cmd) {
- box = &cmd->body.box;
-
- cmd->header.id = SVGA_3D_CMD_UPDATE_GB_IMAGE;
- cmd->header.size = sizeof(cmd->body);
- cmd->body.image.sid = res->id;
- cmd->body.image.face = 0;
- cmd->body.image.mipmap = 0;
-
- if (clips->x1 > size->width || clips->x2 > size->width ||
- clips->y1 > size->height || clips->y2 > size->height) {
- DRM_ERROR("Invalid clips outsize of framebuffer.\n");
- return -EINVAL;
- }
-
- box->x = clips->x1;
- box->y = clips->y1;
- box->z = 0;
- box->w = clips->x2 - clips->x1;
- box->h = clips->y2 - clips->y1;
- box->d = 1;
-
- copy_size += sizeof(*cmd);
- }
-
- vmw_cmd_commit(dev_priv, copy_size);
-
- return 0;
-}
-
-/**
* vmw_kms_create_implicit_placement_property - Set up the implicit placement
* property.
*
@@ -2784,8 +2597,9 @@ int vmw_du_helper_plane_update(struct vmw_du_update_plane *update)
} else {
struct vmw_framebuffer_surface *vfbs =
container_of(update->vfb, typeof(*vfbs), base);
+ struct vmw_surface *surf = vmw_user_object_surface(&vfbs->uo);
- ret = vmw_validation_add_resource(&val_ctx, &vfbs->surface->res,
+ ret = vmw_validation_add_resource(&val_ctx, &surf->res,
0, VMW_RES_DIRTY_NONE, NULL,
NULL);
}
@@ -2941,3 +2755,93 @@ int vmw_connector_get_modes(struct drm_connector *connector)
return num_modes;
}
+
+struct vmw_user_object *vmw_user_object_ref(struct vmw_user_object *uo)
+{
+ if (uo->buffer)
+ vmw_user_bo_ref(uo->buffer);
+ else if (uo->surface)
+ vmw_surface_reference(uo->surface);
+ return uo;
+}
+
+void vmw_user_object_unref(struct vmw_user_object *uo)
+{
+ if (uo->buffer)
+ vmw_user_bo_unref(&uo->buffer);
+ else if (uo->surface)
+ vmw_surface_unreference(&uo->surface);
+}
+
+struct vmw_bo *
+vmw_user_object_buffer(struct vmw_user_object *uo)
+{
+ if (uo->buffer)
+ return uo->buffer;
+ else if (uo->surface)
+ return uo->surface->res.guest_memory_bo;
+ return NULL;
+}
+
+struct vmw_surface *
+vmw_user_object_surface(struct vmw_user_object *uo)
+{
+ if (uo->buffer)
+ return uo->buffer->dumb_surface;
+ return uo->surface;
+}
+
+void *vmw_user_object_map(struct vmw_user_object *uo)
+{
+ struct vmw_bo *bo = vmw_user_object_buffer(uo);
+
+ WARN_ON(!bo);
+ return vmw_bo_map_and_cache(bo);
+}
+
+void *vmw_user_object_map_size(struct vmw_user_object *uo, size_t size)
+{
+ struct vmw_bo *bo = vmw_user_object_buffer(uo);
+
+ WARN_ON(!bo);
+ return vmw_bo_map_and_cache_size(bo, size);
+}
+
+void vmw_user_object_unmap(struct vmw_user_object *uo)
+{
+ struct vmw_bo *bo = vmw_user_object_buffer(uo);
+ int ret;
+
+ WARN_ON(!bo);
+
+ /* Fence the mob creation so we are guarateed to have the mob */
+ ret = ttm_bo_reserve(&bo->tbo, false, false, NULL);
+ if (ret != 0)
+ return;
+
+ vmw_bo_unmap(bo);
+ vmw_bo_pin_reserved(bo, false);
+
+ ttm_bo_unreserve(&bo->tbo);
+}
+
+bool vmw_user_object_is_mapped(struct vmw_user_object *uo)
+{
+ struct vmw_bo *bo;
+
+ if (!uo || vmw_user_object_is_null(uo))
+ return false;
+
+ bo = vmw_user_object_buffer(uo);
+
+ if (WARN_ON(!bo))
+ return false;
+
+ WARN_ON(bo->map.bo && !bo->map.virtual);
+ return bo->map.virtual;
+}
+
+bool vmw_user_object_is_null(struct vmw_user_object *uo)
+{
+ return !uo->buffer && !uo->surface;
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
index bf24f2f0dcfc..6141fadf81ef 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
@@ -1,7 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/**************************************************************************
*
- * Copyright 2009-2023 VMware, Inc., Palo Alto, CA., USA
+ * Copyright (c) 2009-2024 Broadcom. All Rights Reserved. The term
+ * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
@@ -221,11 +222,9 @@ struct vmw_framebuffer {
struct vmw_framebuffer_surface {
struct vmw_framebuffer base;
- struct vmw_surface *surface;
- bool is_bo_proxy; /* true if this is proxy surface for DMA buf */
+ struct vmw_user_object uo;
};
-
struct vmw_framebuffer_bo {
struct vmw_framebuffer base;
struct vmw_bo *buffer;
@@ -277,8 +276,7 @@ struct vmw_cursor_plane_state {
*/
struct vmw_plane_state {
struct drm_plane_state base;
- struct vmw_surface *surf;
- struct vmw_bo *bo;
+ struct vmw_user_object uo;
int content_fb_type;
unsigned long bo_size;
@@ -457,9 +455,7 @@ int vmw_kms_readback(struct vmw_private *dev_priv,
uint32_t num_clips);
struct vmw_framebuffer *
vmw_kms_new_framebuffer(struct vmw_private *dev_priv,
- struct vmw_bo *bo,
- struct vmw_surface *surface,
- bool only_2d,
+ struct vmw_user_object *uo,
const struct drm_mode_fb_cmd2 *mode_cmd);
void vmw_guess_mode_timing(struct drm_display_mode *mode);
void vmw_kms_update_implicit_fb(struct vmw_private *dev_priv);
@@ -486,8 +482,7 @@ void vmw_du_plane_reset(struct drm_plane *plane);
struct drm_plane_state *vmw_du_plane_duplicate_state(struct drm_plane *plane);
void vmw_du_plane_destroy_state(struct drm_plane *plane,
struct drm_plane_state *state);
-void vmw_du_plane_unpin_surf(struct vmw_plane_state *vps,
- bool unreference);
+void vmw_du_plane_unpin_surf(struct vmw_plane_state *vps);
int vmw_du_crtc_atomic_check(struct drm_crtc *crtc,
struct drm_atomic_state *state);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
index 5befc2719a49..39949e0a493f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
@@ -1,7 +1,8 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/**************************************************************************
*
- * Copyright 2009-2023 VMware, Inc., Palo Alto, CA., USA
+ * Copyright (c) 2009-2024 Broadcom. All Rights Reserved. The term
+ * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
@@ -147,8 +148,9 @@ static int vmw_ldu_fb_pin(struct vmw_framebuffer *vfb)
struct vmw_bo *buf;
int ret;
- buf = vfb->bo ? vmw_framebuffer_to_vfbd(&vfb->base)->buffer :
- vmw_framebuffer_to_vfbs(&vfb->base)->surface->res.guest_memory_bo;
+ buf = vfb->bo ?
+ vmw_framebuffer_to_vfbd(&vfb->base)->buffer :
+ vmw_user_object_buffer(&vmw_framebuffer_to_vfbs(&vfb->base)->uo);
if (!buf)
return 0;
@@ -169,8 +171,10 @@ static int vmw_ldu_fb_unpin(struct vmw_framebuffer *vfb)
struct vmw_private *dev_priv = vmw_priv(vfb->base.dev);
struct vmw_bo *buf;
- buf = vfb->bo ? vmw_framebuffer_to_vfbd(&vfb->base)->buffer :
- vmw_framebuffer_to_vfbs(&vfb->base)->surface->res.guest_memory_bo;
+ buf = vfb->bo ?
+ vmw_framebuffer_to_vfbd(&vfb->base)->buffer :
+ vmw_user_object_buffer(&vmw_framebuffer_to_vfbs(&vfb->base)->uo);
+
if (WARN_ON(!buf))
return 0;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
index c45b4724e414..e20f64b67b26 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
@@ -92,7 +92,7 @@ static int vmw_overlay_send_put(struct vmw_private *dev_priv,
{
struct vmw_escape_video_flush *flush;
size_t fifo_size;
- bool have_so = (dev_priv->active_display_unit == vmw_du_screen_object);
+ bool have_so = (dev_priv->active_display_unit != vmw_du_legacy);
int i, num_items;
SVGAGuestPtr ptr;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c b/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c
index c99cad444991..598b90ac7590 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c
@@ -1,7 +1,8 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/**************************************************************************
*
- * Copyright 2013 VMware, Inc., Palo Alto, CA., USA
+ * Copyright (c) 2013-2024 Broadcom. All Rights Reserved. The term
+ * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
@@ -31,6 +32,7 @@
*/
#include "vmwgfx_drv.h"
+#include "vmwgfx_bo.h"
#include "ttm_object.h"
#include <linux/dma-buf.h>
@@ -88,13 +90,35 @@ int vmw_prime_handle_to_fd(struct drm_device *dev,
uint32_t handle, uint32_t flags,
int *prime_fd)
{
+ struct vmw_private *vmw = vmw_priv(dev);
struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+ struct vmw_bo *vbo;
int ret;
+ int surf_handle;
- if (handle > VMWGFX_NUM_MOB)
+ if (handle > VMWGFX_NUM_MOB) {
ret = ttm_prime_handle_to_fd(tfile, handle, flags, prime_fd);
- else
- ret = drm_gem_prime_handle_to_fd(dev, file_priv, handle, flags, prime_fd);
+ } else {
+ ret = vmw_user_bo_lookup(file_priv, handle, &vbo);
+ if (ret)
+ return ret;
+ if (vbo && vbo->is_dumb) {
+ ret = drm_gem_prime_handle_to_fd(dev, file_priv, handle,
+ flags, prime_fd);
+ } else {
+ surf_handle = vmw_lookup_surface_handle_for_buffer(vmw,
+ vbo,
+ handle);
+ if (surf_handle > 0)
+ ret = ttm_prime_handle_to_fd(tfile, surf_handle,
+ flags, prime_fd);
+ else
+ ret = drm_gem_prime_handle_to_fd(dev, file_priv,
+ handle, flags,
+ prime_fd);
+ }
+ vmw_user_bo_unref(&vbo);
+ }
return ret;
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index 848dba09981b..a73af8a355fb 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -1,7 +1,8 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/**************************************************************************
*
- * Copyright 2009-2023 VMware, Inc., Palo Alto, CA., USA
+ * Copyright (c) 2009-2024 Broadcom. All Rights Reserved. The term
+ * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
@@ -58,6 +59,7 @@ void vmw_resource_mob_attach(struct vmw_resource *res)
rb_link_node(&res->mob_node, parent, new);
rb_insert_color(&res->mob_node, &gbo->res_tree);
+ vmw_bo_del_detached_resource(gbo, res);
vmw_bo_prio_add(gbo, res->used_prio);
}
@@ -287,28 +289,35 @@ out_bad_resource:
*
* The pointer this pointed at by out_surf and out_buf needs to be null.
*/
-int vmw_user_lookup_handle(struct vmw_private *dev_priv,
+int vmw_user_object_lookup(struct vmw_private *dev_priv,
struct drm_file *filp,
- uint32_t handle,
- struct vmw_surface **out_surf,
- struct vmw_bo **out_buf)
+ u32 handle,
+ struct vmw_user_object *uo)
{
struct ttm_object_file *tfile = vmw_fpriv(filp)->tfile;
struct vmw_resource *res;
int ret;
- BUG_ON(*out_surf || *out_buf);
+ WARN_ON(uo->surface || uo->buffer);
ret = vmw_user_resource_lookup_handle(dev_priv, tfile, handle,
user_surface_converter,
&res);
if (!ret) {
- *out_surf = vmw_res_to_srf(res);
+ uo->surface = vmw_res_to_srf(res);
return 0;
}
- *out_surf = NULL;
- ret = vmw_user_bo_lookup(filp, handle, out_buf);
+ uo->surface = NULL;
+ ret = vmw_user_bo_lookup(filp, handle, &uo->buffer);
+ if (!ret && !uo->buffer->is_dumb) {
+ uo->surface = vmw_lookup_surface_for_buffer(dev_priv,
+ uo->buffer,
+ handle);
+ if (uo->surface)
+ vmw_user_bo_unref(&uo->buffer);
+ }
+
return ret;
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
index df0039a8ef29..0f4bfd98480a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
@@ -1,7 +1,8 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/**************************************************************************
*
- * Copyright 2011-2023 VMware, Inc., Palo Alto, CA., USA
+ * Copyright (c) 2011-2024 Broadcom. All Rights Reserved. The term
+ * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
@@ -240,7 +241,7 @@ static void vmw_sou_crtc_mode_set_nofb(struct drm_crtc *crtc)
struct vmw_connector_state *vmw_conn_state;
int x, y;
- sou->buffer = vps->bo;
+ sou->buffer = vmw_user_object_buffer(&vps->uo);
conn_state = sou->base.connector.state;
vmw_conn_state = vmw_connector_state_to_vcs(conn_state);
@@ -376,10 +377,11 @@ vmw_sou_primary_plane_cleanup_fb(struct drm_plane *plane,
struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state);
struct drm_crtc *crtc = plane->state->crtc ?
plane->state->crtc : old_state->crtc;
+ struct vmw_bo *bo = vmw_user_object_buffer(&vps->uo);
- if (vps->bo)
- vmw_bo_unpin(vmw_priv(crtc->dev), vps->bo, false);
- vmw_bo_unreference(&vps->bo);
+ if (bo)
+ vmw_bo_unpin(vmw_priv(crtc->dev), bo, false);
+ vmw_user_object_unref(&vps->uo);
vps->bo_size = 0;
vmw_du_plane_cleanup_fb(plane, old_state);
@@ -411,9 +413,10 @@ vmw_sou_primary_plane_prepare_fb(struct drm_plane *plane,
.bo_type = ttm_bo_type_device,
.pin = true
};
+ struct vmw_bo *bo = NULL;
if (!new_fb) {
- vmw_bo_unreference(&vps->bo);
+ vmw_user_object_unref(&vps->uo);
vps->bo_size = 0;
return 0;
@@ -422,17 +425,17 @@ vmw_sou_primary_plane_prepare_fb(struct drm_plane *plane,
bo_params.size = new_state->crtc_w * new_state->crtc_h * 4;
dev_priv = vmw_priv(crtc->dev);
- if (vps->bo) {
+ bo = vmw_user_object_buffer(&vps->uo);
+ if (bo) {
if (vps->bo_size == bo_params.size) {
/*
* Note that this might temporarily up the pin-count
* to 2, until cleanup_fb() is called.
*/
- return vmw_bo_pin_in_vram(dev_priv, vps->bo,
- true);
+ return vmw_bo_pin_in_vram(dev_priv, bo, true);
}
- vmw_bo_unreference(&vps->bo);
+ vmw_user_object_unref(&vps->uo);
vps->bo_size = 0;
}
@@ -442,7 +445,7 @@ vmw_sou_primary_plane_prepare_fb(struct drm_plane *plane,
* resume the overlays, this is preferred to failing to alloc.
*/
vmw_overlay_pause_all(dev_priv);
- ret = vmw_bo_create(dev_priv, &bo_params, &vps->bo);
+ ret = vmw_gem_object_create(dev_priv, &bo_params, &vps->uo.buffer);
vmw_overlay_resume_all(dev_priv);
if (ret)
return ret;
@@ -453,7 +456,7 @@ vmw_sou_primary_plane_prepare_fb(struct drm_plane *plane,
* TTM already thinks the buffer is pinned, but make sure the
* pin_count is upped.
*/
- return vmw_bo_pin_in_vram(dev_priv, vps->bo, true);
+ return vmw_bo_pin_in_vram(dev_priv, vps->uo.buffer, true);
}
static uint32_t vmw_sou_bo_fifo_size(struct vmw_du_update_plane *update,
@@ -580,6 +583,7 @@ static uint32_t vmw_sou_surface_pre_clip(struct vmw_du_update_plane *update,
{
struct vmw_kms_sou_dirty_cmd *blit = cmd;
struct vmw_framebuffer_surface *vfbs;
+ struct vmw_surface *surf = NULL;
vfbs = container_of(update->vfb, typeof(*vfbs), base);
@@ -587,7 +591,8 @@ static uint32_t vmw_sou_surface_pre_clip(struct vmw_du_update_plane *update,
blit->header.size = sizeof(blit->body) + sizeof(SVGASignedRect) *
num_hits;
- blit->body.srcImage.sid = vfbs->surface->res.id;
+ surf = vmw_user_object_surface(&vfbs->uo);
+ blit->body.srcImage.sid = surf->res.id;
blit->body.destScreenId = update->du->unit;
/* Update the source and destination bounding box later in post_clip */
@@ -1104,7 +1109,7 @@ int vmw_kms_sou_do_surface_dirty(struct vmw_private *dev_priv,
int ret;
if (!srf)
- srf = &vfbs->surface->res;
+ srf = &vmw_user_object_surface(&vfbs->uo)->res;
ret = vmw_validation_add_resource(&val_ctx, srf, 0, VMW_RES_DIRTY_NONE,
NULL, NULL);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
index a04e0736318d..fab155a68054 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
@@ -1,7 +1,8 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/******************************************************************************
*
- * COPYRIGHT (C) 2014-2023 VMware, Inc., Palo Alto, CA., USA
+ * Copyright (c) 2014-2024 Broadcom. All Rights Reserved. The term
+ * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
@@ -29,6 +30,7 @@
#include "vmwgfx_kms.h"
#include "vmwgfx_vkms.h"
#include "vmw_surface_cache.h"
+#include <linux/fsnotify.h>
#include <drm/drm_atomic.h>
#include <drm/drm_atomic_helper.h>
@@ -500,7 +502,7 @@ static void vmw_stdu_bo_cpu_commit(struct vmw_kms_dirty *dirty)
container_of(dirty->unit, typeof(*stdu), base);
s32 width, height;
s32 src_pitch, dst_pitch;
- struct ttm_buffer_object *src_bo, *dst_bo;
+ struct vmw_bo *src_bo, *dst_bo;
u32 src_offset, dst_offset;
struct vmw_diff_cpy diff = VMW_CPU_BLIT_DIFF_INITIALIZER(stdu->cpp);
@@ -515,11 +517,11 @@ static void vmw_stdu_bo_cpu_commit(struct vmw_kms_dirty *dirty)
/* Assume we are blitting from Guest (bo) to Host (display_srf) */
src_pitch = stdu->display_srf->metadata.base_size.width * stdu->cpp;
- src_bo = &stdu->display_srf->res.guest_memory_bo->tbo;
+ src_bo = stdu->display_srf->res.guest_memory_bo;
src_offset = ddirty->top * src_pitch + ddirty->left * stdu->cpp;
dst_pitch = ddirty->pitch;
- dst_bo = &ddirty->buf->tbo;
+ dst_bo = ddirty->buf;
dst_offset = ddirty->fb_top * dst_pitch + ddirty->fb_left * stdu->cpp;
(void) vmw_bo_cpu_blit(dst_bo, dst_offset, dst_pitch,
@@ -735,7 +737,7 @@ int vmw_kms_stdu_surface_dirty(struct vmw_private *dev_priv,
int ret;
if (!srf)
- srf = &vfbs->surface->res;
+ srf = &vmw_user_object_surface(&vfbs->uo)->res;
ret = vmw_validation_add_resource(&val_ctx, srf, 0, VMW_RES_DIRTY_NONE,
NULL, NULL);
@@ -746,12 +748,6 @@ int vmw_kms_stdu_surface_dirty(struct vmw_private *dev_priv,
if (ret)
goto out_unref;
- if (vfbs->is_bo_proxy) {
- ret = vmw_kms_update_proxy(srf, clips, num_clips, inc);
- if (ret)
- goto out_finish;
- }
-
sdirty.base.fifo_commit = vmw_kms_stdu_surface_fifo_commit;
sdirty.base.clip = vmw_kms_stdu_surface_clip;
sdirty.base.fifo_reserve_size = sizeof(struct vmw_stdu_surface_copy) +
@@ -765,7 +761,7 @@ int vmw_kms_stdu_surface_dirty(struct vmw_private *dev_priv,
ret = vmw_kms_helper_dirty(dev_priv, framebuffer, clips, vclips,
dest_x, dest_y, num_clips, inc,
&sdirty.base);
-out_finish:
+
vmw_kms_helper_validation_finish(dev_priv, NULL, &val_ctx, out_fence,
NULL);
@@ -877,6 +873,32 @@ vmw_stdu_connector_mode_valid(struct drm_connector *connector,
return MODE_OK;
}
+/*
+ * Trigger a modeset if the X,Y position of the Screen Target changes.
+ * This is needed when multi-mon is cycled. The original Screen Target will have
+ * the same mode but its relative X,Y position in the topology will change.
+ */
+static int vmw_stdu_connector_atomic_check(struct drm_connector *conn,
+ struct drm_atomic_state *state)
+{
+ struct drm_connector_state *conn_state;
+ struct vmw_screen_target_display_unit *du;
+ struct drm_crtc_state *new_crtc_state;
+
+ conn_state = drm_atomic_get_connector_state(state, conn);
+ du = vmw_connector_to_stdu(conn);
+
+ if (!conn_state->crtc)
+ return 0;
+
+ new_crtc_state = drm_atomic_get_new_crtc_state(state, conn_state->crtc);
+ if (du->base.gui_x != du->base.set_gui_x ||
+ du->base.gui_y != du->base.set_gui_y)
+ new_crtc_state->mode_changed = true;
+
+ return 0;
+}
+
static const struct drm_connector_funcs vmw_stdu_connector_funcs = {
.dpms = vmw_du_connector_dpms,
.detect = vmw_du_connector_detect,
@@ -891,7 +913,8 @@ static const struct drm_connector_funcs vmw_stdu_connector_funcs = {
static const struct
drm_connector_helper_funcs vmw_stdu_connector_helper_funcs = {
.get_modes = vmw_connector_get_modes,
- .mode_valid = vmw_stdu_connector_mode_valid
+ .mode_valid = vmw_stdu_connector_mode_valid,
+ .atomic_check = vmw_stdu_connector_atomic_check,
};
@@ -918,9 +941,8 @@ vmw_stdu_primary_plane_cleanup_fb(struct drm_plane *plane,
{
struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state);
- if (vps->surf)
+ if (vmw_user_object_surface(&vps->uo))
WARN_ON(!vps->pinned);
-
vmw_du_plane_cleanup_fb(plane, old_state);
vps->content_fb_type = SAME_AS_DISPLAY;
@@ -928,7 +950,6 @@ vmw_stdu_primary_plane_cleanup_fb(struct drm_plane *plane,
}
-
/**
* vmw_stdu_primary_plane_prepare_fb - Readies the display surface
*
@@ -952,13 +973,15 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane,
enum stdu_content_type new_content_type;
struct vmw_framebuffer_surface *new_vfbs;
uint32_t hdisplay = new_state->crtc_w, vdisplay = new_state->crtc_h;
+ struct drm_plane_state *old_state = plane->state;
+ struct drm_rect rect;
int ret;
/* No FB to prepare */
if (!new_fb) {
- if (vps->surf) {
+ if (vmw_user_object_surface(&vps->uo)) {
WARN_ON(vps->pinned != 0);
- vmw_surface_unreference(&vps->surf);
+ vmw_user_object_unref(&vps->uo);
}
return 0;
@@ -968,8 +991,8 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane,
new_vfbs = (vfb->bo) ? NULL : vmw_framebuffer_to_vfbs(new_fb);
if (new_vfbs &&
- new_vfbs->surface->metadata.base_size.width == hdisplay &&
- new_vfbs->surface->metadata.base_size.height == vdisplay)
+ vmw_user_object_surface(&new_vfbs->uo)->metadata.base_size.width == hdisplay &&
+ vmw_user_object_surface(&new_vfbs->uo)->metadata.base_size.height == vdisplay)
new_content_type = SAME_AS_DISPLAY;
else if (vfb->bo)
new_content_type = SEPARATE_BO;
@@ -1007,29 +1030,29 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane,
metadata.num_sizes = 1;
metadata.scanout = true;
} else {
- metadata = new_vfbs->surface->metadata;
+ metadata = vmw_user_object_surface(&new_vfbs->uo)->metadata;
}
metadata.base_size.width = hdisplay;
metadata.base_size.height = vdisplay;
metadata.base_size.depth = 1;
- if (vps->surf) {
+ if (vmw_user_object_surface(&vps->uo)) {
struct drm_vmw_size cur_base_size =
- vps->surf->metadata.base_size;
+ vmw_user_object_surface(&vps->uo)->metadata.base_size;
if (cur_base_size.width != metadata.base_size.width ||
cur_base_size.height != metadata.base_size.height ||
- vps->surf->metadata.format != metadata.format) {
+ vmw_user_object_surface(&vps->uo)->metadata.format != metadata.format) {
WARN_ON(vps->pinned != 0);
- vmw_surface_unreference(&vps->surf);
+ vmw_user_object_unref(&vps->uo);
}
}
- if (!vps->surf) {
+ if (!vmw_user_object_surface(&vps->uo)) {
ret = vmw_gb_surface_define(dev_priv, &metadata,
- &vps->surf);
+ &vps->uo.surface);
if (ret != 0) {
DRM_ERROR("Couldn't allocate STDU surface.\n");
return ret;
@@ -1042,18 +1065,19 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane,
* The only time we add a reference in prepare_fb is if the
* state object doesn't have a reference to begin with
*/
- if (vps->surf) {
+ if (vmw_user_object_surface(&vps->uo)) {
WARN_ON(vps->pinned != 0);
- vmw_surface_unreference(&vps->surf);
+ vmw_user_object_unref(&vps->uo);
}
- vps->surf = vmw_surface_reference(new_vfbs->surface);
+ memcpy(&vps->uo, &new_vfbs->uo, sizeof(vps->uo));
+ vmw_user_object_ref(&vps->uo);
}
- if (vps->surf) {
+ if (vmw_user_object_surface(&vps->uo)) {
/* Pin new surface before flipping */
- ret = vmw_resource_pin(&vps->surf->res, false);
+ ret = vmw_resource_pin(&vmw_user_object_surface(&vps->uo)->res, false);
if (ret)
goto out_srf_unref;
@@ -1063,6 +1087,34 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane,
vps->content_fb_type = new_content_type;
/*
+ * The drm fb code will do blit's via the vmap interface, which doesn't
+ * trigger vmw_bo page dirty tracking due to being kernel side (and thus
+ * doesn't require mmap'ing) so we have to update the surface's dirty
+ * regions by hand but we want to be careful to not overwrite the
+ * resource if it has been written to by the gpu (res_dirty).
+ */
+ if (vps->uo.buffer && vps->uo.buffer->is_dumb) {
+ struct vmw_surface *surf = vmw_user_object_surface(&vps->uo);
+ struct vmw_resource *res = &surf->res;
+
+ if (!res->res_dirty && drm_atomic_helper_damage_merged(old_state,
+ new_state,
+ &rect)) {
+ /*
+ * At some point it might be useful to actually translate
+ * (rect.x1, rect.y1) => start, and (rect.x2, rect.y2) => end,
+ * but currently the fb code will just report the entire fb
+ * dirty so in practice it doesn't matter.
+ */
+ pgoff_t start = res->guest_memory_offset >> PAGE_SHIFT;
+ pgoff_t end = __KERNEL_DIV_ROUND_UP(res->guest_memory_offset +
+ res->guest_memory_size,
+ PAGE_SIZE);
+ vmw_resource_dirty_update(res, start, end);
+ }
+ }
+
+ /*
* This should only happen if the buffer object is too large to create a
* proxy surface for.
*/
@@ -1072,7 +1124,7 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane,
return 0;
out_srf_unref:
- vmw_surface_unreference(&vps->surf);
+ vmw_user_object_unref(&vps->uo);
return ret;
}
@@ -1118,7 +1170,7 @@ vmw_stdu_bo_populate_update_cpu(struct vmw_du_update_plane *update, void *cmd,
struct vmw_diff_cpy diff = VMW_CPU_BLIT_DIFF_INITIALIZER(0);
struct vmw_stdu_update_gb_image *cmd_img = cmd;
struct vmw_stdu_update *cmd_update;
- struct ttm_buffer_object *src_bo, *dst_bo;
+ struct vmw_bo *src_bo, *dst_bo;
u32 src_offset, dst_offset;
s32 src_pitch, dst_pitch;
s32 width, height;
@@ -1132,11 +1184,11 @@ vmw_stdu_bo_populate_update_cpu(struct vmw_du_update_plane *update, void *cmd,
diff.cpp = stdu->cpp;
- dst_bo = &stdu->display_srf->res.guest_memory_bo->tbo;
+ dst_bo = stdu->display_srf->res.guest_memory_bo;
dst_pitch = stdu->display_srf->metadata.base_size.width * stdu->cpp;
dst_offset = bb->y1 * dst_pitch + bb->x1 * stdu->cpp;
- src_bo = &vfbbo->buffer->tbo;
+ src_bo = vfbbo->buffer;
src_pitch = update->vfb->base.pitches[0];
src_offset = bo_update->fb_top * src_pitch + bo_update->fb_left *
stdu->cpp;
@@ -1214,14 +1266,8 @@ static uint32_t
vmw_stdu_surface_fifo_size_same_display(struct vmw_du_update_plane *update,
uint32_t num_hits)
{
- struct vmw_framebuffer_surface *vfbs;
uint32_t size = 0;
- vfbs = container_of(update->vfb, typeof(*vfbs), base);
-
- if (vfbs->is_bo_proxy)
- size += sizeof(struct vmw_stdu_update_gb_image) * num_hits;
-
size += sizeof(struct vmw_stdu_update);
return size;
@@ -1230,14 +1276,8 @@ vmw_stdu_surface_fifo_size_same_display(struct vmw_du_update_plane *update,
static uint32_t vmw_stdu_surface_fifo_size(struct vmw_du_update_plane *update,
uint32_t num_hits)
{
- struct vmw_framebuffer_surface *vfbs;
uint32_t size = 0;
- vfbs = container_of(update->vfb, typeof(*vfbs), base);
-
- if (vfbs->is_bo_proxy)
- size += sizeof(struct vmw_stdu_update_gb_image) * num_hits;
-
size += sizeof(struct vmw_stdu_surface_copy) + sizeof(SVGA3dCopyBox) *
num_hits + sizeof(struct vmw_stdu_update);
@@ -1245,47 +1285,6 @@ static uint32_t vmw_stdu_surface_fifo_size(struct vmw_du_update_plane *update,
}
static uint32_t
-vmw_stdu_surface_update_proxy(struct vmw_du_update_plane *update, void *cmd)
-{
- struct vmw_framebuffer_surface *vfbs;
- struct drm_plane_state *state = update->plane->state;
- struct drm_plane_state *old_state = update->old_state;
- struct vmw_stdu_update_gb_image *cmd_update = cmd;
- struct drm_atomic_helper_damage_iter iter;
- struct drm_rect clip;
- uint32_t copy_size = 0;
-
- vfbs = container_of(update->vfb, typeof(*vfbs), base);
-
- /*
- * proxy surface is special where a buffer object type fb is wrapped
- * in a surface and need an update gb image command to sync with device.
- */
- drm_atomic_helper_damage_iter_init(&iter, old_state, state);
- drm_atomic_for_each_plane_damage(&iter, &clip) {
- SVGA3dBox *box = &cmd_update->body.box;
-
- cmd_update->header.id = SVGA_3D_CMD_UPDATE_GB_IMAGE;
- cmd_update->header.size = sizeof(cmd_update->body);
- cmd_update->body.image.sid = vfbs->surface->res.id;
- cmd_update->body.image.face = 0;
- cmd_update->body.image.mipmap = 0;
-
- box->x = clip.x1;
- box->y = clip.y1;
- box->z = 0;
- box->w = drm_rect_width(&clip);
- box->h = drm_rect_height(&clip);
- box->d = 1;
-
- copy_size += sizeof(*cmd_update);
- cmd_update++;
- }
-
- return copy_size;
-}
-
-static uint32_t
vmw_stdu_surface_populate_copy(struct vmw_du_update_plane *update, void *cmd,
uint32_t num_hits)
{
@@ -1299,7 +1298,7 @@ vmw_stdu_surface_populate_copy(struct vmw_du_update_plane *update, void *cmd,
cmd_copy->header.id = SVGA_3D_CMD_SURFACE_COPY;
cmd_copy->header.size = sizeof(cmd_copy->body) + sizeof(SVGA3dCopyBox) *
num_hits;
- cmd_copy->body.src.sid = vfbs->surface->res.id;
+ cmd_copy->body.src.sid = vmw_user_object_surface(&vfbs->uo)->res.id;
cmd_copy->body.dest.sid = stdu->display_srf->res.id;
return sizeof(*cmd_copy);
@@ -1370,10 +1369,7 @@ static int vmw_stdu_plane_update_surface(struct vmw_private *dev_priv,
srf_update.mutex = &dev_priv->cmdbuf_mutex;
srf_update.intr = true;
- if (vfbs->is_bo_proxy)
- srf_update.post_prepare = vmw_stdu_surface_update_proxy;
-
- if (vfbs->surface->res.id != stdu->display_srf->res.id) {
+ if (vmw_user_object_surface(&vfbs->uo)->res.id != stdu->display_srf->res.id) {
srf_update.calc_fifo_size = vmw_stdu_surface_fifo_size;
srf_update.pre_clip = vmw_stdu_surface_populate_copy;
srf_update.clip = vmw_stdu_surface_populate_clip;
@@ -1417,7 +1413,7 @@ vmw_stdu_primary_plane_atomic_update(struct drm_plane *plane,
stdu = vmw_crtc_to_stdu(crtc);
dev_priv = vmw_priv(crtc->dev);
- stdu->display_srf = vps->surf;
+ stdu->display_srf = vmw_user_object_surface(&vps->uo);
stdu->content_fb_type = vps->content_fb_type;
stdu->cpp = vps->cpp;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
index e7a744dfcecf..1625b30d9970 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
@@ -1,7 +1,8 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/**************************************************************************
*
- * Copyright 2009-2023 VMware, Inc., Palo Alto, CA., USA
+ * Copyright (c) 2009-2024 Broadcom. All Rights Reserved. The term
+ * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
@@ -36,9 +37,6 @@
#include <drm/ttm/ttm_placement.h>
#define SVGA3D_FLAGS_64(upper32, lower32) (((uint64_t)upper32 << 32) | lower32)
-#define SVGA3D_FLAGS_UPPER_32(svga3d_flags) (svga3d_flags >> 32)
-#define SVGA3D_FLAGS_LOWER_32(svga3d_flags) \
- (svga3d_flags & ((uint64_t)U32_MAX))
/**
* struct vmw_user_surface - User-space visible surface resource
@@ -686,6 +684,14 @@ static void vmw_user_surface_base_release(struct ttm_base_object **p_base)
struct vmw_resource *res = &user_srf->srf.res;
*p_base = NULL;
+
+ /*
+ * Dumb buffers own the resource and they'll unref the
+ * resource themselves
+ */
+ if (res && res->guest_memory_bo && res->guest_memory_bo->is_dumb)
+ return;
+
vmw_resource_unreference(&res);
}
@@ -812,7 +818,8 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
}
}
res->guest_memory_size = cur_bo_offset;
- if (metadata->scanout &&
+ if (!file_priv->atomic &&
+ metadata->scanout &&
metadata->num_sizes == 1 &&
metadata->sizes[0].width == VMW_CURSOR_SNOOP_WIDTH &&
metadata->sizes[0].height == VMW_CURSOR_SNOOP_HEIGHT &&
@@ -864,6 +871,7 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
vmw_resource_unreference(&res);
goto out_unlock;
}
+ vmw_bo_add_detached_resource(res->guest_memory_bo, res);
}
tmp = vmw_resource_reference(&srf->res);
@@ -892,6 +900,113 @@ out_unlock:
return ret;
}
+static struct vmw_user_surface *
+vmw_lookup_user_surface_for_buffer(struct vmw_private *vmw, struct vmw_bo *bo,
+ u32 handle)
+{
+ struct vmw_user_surface *user_srf = NULL;
+ struct vmw_surface *surf;
+ struct ttm_base_object *base;
+
+ surf = vmw_bo_surface(bo);
+ if (surf) {
+ rcu_read_lock();
+ user_srf = container_of(surf, struct vmw_user_surface, srf);
+ base = &user_srf->prime.base;
+ if (base && !kref_get_unless_zero(&base->refcount)) {
+ drm_dbg_driver(&vmw->drm,
+ "%s: referencing a stale surface handle %d\n",
+ __func__, handle);
+ base = NULL;
+ user_srf = NULL;
+ }
+ rcu_read_unlock();
+ }
+
+ return user_srf;
+}
+
+struct vmw_surface *vmw_lookup_surface_for_buffer(struct vmw_private *vmw,
+ struct vmw_bo *bo,
+ u32 handle)
+{
+ struct vmw_user_surface *user_srf =
+ vmw_lookup_user_surface_for_buffer(vmw, bo, handle);
+ struct vmw_surface *surf = NULL;
+ struct ttm_base_object *base;
+
+ if (user_srf) {
+ surf = vmw_surface_reference(&user_srf->srf);
+ base = &user_srf->prime.base;
+ ttm_base_object_unref(&base);
+ }
+ return surf;
+}
+
+u32 vmw_lookup_surface_handle_for_buffer(struct vmw_private *vmw,
+ struct vmw_bo *bo,
+ u32 handle)
+{
+ struct vmw_user_surface *user_srf =
+ vmw_lookup_user_surface_for_buffer(vmw, bo, handle);
+ int surf_handle = 0;
+ struct ttm_base_object *base;
+
+ if (user_srf) {
+ base = &user_srf->prime.base;
+ surf_handle = (u32)base->handle;
+ ttm_base_object_unref(&base);
+ }
+ return surf_handle;
+}
+
+static int vmw_buffer_prime_to_surface_base(struct vmw_private *dev_priv,
+ struct drm_file *file_priv,
+ u32 fd, u32 *handle,
+ struct ttm_base_object **base_p)
+{
+ struct ttm_base_object *base;
+ struct vmw_bo *bo;
+ struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+ struct vmw_user_surface *user_srf;
+ int ret;
+
+ ret = drm_gem_prime_fd_to_handle(&dev_priv->drm, file_priv, fd, handle);
+ if (ret) {
+ drm_warn(&dev_priv->drm,
+ "Wasn't able to find user buffer for fd = %u.\n", fd);
+ return ret;
+ }
+
+ ret = vmw_user_bo_lookup(file_priv, *handle, &bo);
+ if (ret) {
+ drm_warn(&dev_priv->drm,
+ "Wasn't able to lookup user buffer for handle = %u.\n", *handle);
+ return ret;
+ }
+
+ user_srf = vmw_lookup_user_surface_for_buffer(dev_priv, bo, *handle);
+ if (WARN_ON(!user_srf)) {
+ drm_warn(&dev_priv->drm,
+ "User surface fd %d (handle %d) is null.\n", fd, *handle);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ base = &user_srf->prime.base;
+ ret = ttm_ref_object_add(tfile, base, NULL, false);
+ if (ret) {
+ drm_warn(&dev_priv->drm,
+ "Couldn't add an object ref for the buffer (%d).\n", *handle);
+ goto out;
+ }
+
+ *base_p = base;
+out:
+ vmw_user_bo_unref(&bo);
+
+ return ret;
+}
static int
vmw_surface_handle_reference(struct vmw_private *dev_priv,
@@ -901,15 +1016,19 @@ vmw_surface_handle_reference(struct vmw_private *dev_priv,
struct ttm_base_object **base_p)
{
struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
- struct vmw_user_surface *user_srf;
+ struct vmw_user_surface *user_srf = NULL;
uint32_t handle;
struct ttm_base_object *base;
int ret;
if (handle_type == DRM_VMW_HANDLE_PRIME) {
ret = ttm_prime_fd_to_handle(tfile, u_handle, &handle);
- if (unlikely(ret != 0))
- return ret;
+ if (ret)
+ return vmw_buffer_prime_to_surface_base(dev_priv,
+ file_priv,
+ u_handle,
+ &handle,
+ base_p);
} else {
handle = u_handle;
}
@@ -1503,7 +1622,12 @@ vmw_gb_surface_define_internal(struct drm_device *dev,
ret = vmw_user_bo_lookup(file_priv, req->base.buffer_handle,
&res->guest_memory_bo);
if (ret == 0) {
- if (res->guest_memory_bo->tbo.base.size < res->guest_memory_size) {
+ if (res->guest_memory_bo->is_dumb) {
+ VMW_DEBUG_USER("Can't backup surface with a dumb buffer.\n");
+ vmw_user_bo_unref(&res->guest_memory_bo);
+ ret = -EINVAL;
+ goto out_unlock;
+ } else if (res->guest_memory_bo->tbo.base.size < res->guest_memory_size) {
VMW_DEBUG_USER("Surface backup buffer too small.\n");
vmw_user_bo_unref(&res->guest_memory_bo);
ret = -EINVAL;
@@ -1560,6 +1684,7 @@ vmw_gb_surface_define_internal(struct drm_device *dev,
rep->handle = user_srf->prime.base.handle;
rep->backup_size = res->guest_memory_size;
if (res->guest_memory_bo) {
+ vmw_bo_add_detached_resource(res->guest_memory_bo, res);
rep->buffer_map_handle =
drm_vma_node_offset_addr(&res->guest_memory_bo->tbo.base.vma_node);
rep->buffer_size = res->guest_memory_bo->tbo.base.size;
@@ -2100,3 +2225,142 @@ int vmw_gb_surface_define(struct vmw_private *dev_priv,
out_unlock:
return ret;
}
+
+static SVGA3dSurfaceFormat vmw_format_bpp_to_svga(struct vmw_private *vmw,
+ int bpp)
+{
+ switch (bpp) {
+ case 8: /* DRM_FORMAT_C8 */
+ return SVGA3D_P8;
+ case 16: /* DRM_FORMAT_RGB565 */
+ return SVGA3D_R5G6B5;
+ case 32: /* DRM_FORMAT_XRGB8888 */
+ if (has_sm4_context(vmw))
+ return SVGA3D_B8G8R8X8_UNORM;
+ return SVGA3D_X8R8G8B8;
+ default:
+ drm_warn(&vmw->drm, "Unsupported format bpp: %d\n", bpp);
+ return SVGA3D_X8R8G8B8;
+ }
+}
+
+/**
+ * vmw_dumb_create - Create a dumb kms buffer
+ *
+ * @file_priv: Pointer to a struct drm_file identifying the caller.
+ * @dev: Pointer to the drm device.
+ * @args: Pointer to a struct drm_mode_create_dumb structure
+ * Return: Zero on success, negative error code on failure.
+ *
+ * This is a driver callback for the core drm create_dumb functionality.
+ * Note that this is very similar to the vmw_bo_alloc ioctl, except
+ * that the arguments have a different format.
+ */
+int vmw_dumb_create(struct drm_file *file_priv,
+ struct drm_device *dev,
+ struct drm_mode_create_dumb *args)
+{
+ struct vmw_private *dev_priv = vmw_priv(dev);
+ struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+ struct vmw_bo *vbo = NULL;
+ struct vmw_resource *res = NULL;
+ union drm_vmw_gb_surface_create_ext_arg arg = { 0 };
+ struct drm_vmw_gb_surface_create_ext_req *req = &arg.req;
+ int ret;
+ struct drm_vmw_size drm_size = {
+ .width = args->width,
+ .height = args->height,
+ .depth = 1,
+ };
+ SVGA3dSurfaceFormat format = vmw_format_bpp_to_svga(dev_priv, args->bpp);
+ const struct SVGA3dSurfaceDesc *desc = vmw_surface_get_desc(format);
+ SVGA3dSurfaceAllFlags flags = SVGA3D_SURFACE_HINT_TEXTURE |
+ SVGA3D_SURFACE_HINT_RENDERTARGET |
+ SVGA3D_SURFACE_SCREENTARGET |
+ SVGA3D_SURFACE_BIND_SHADER_RESOURCE |
+ SVGA3D_SURFACE_BIND_RENDER_TARGET;
+
+ /*
+ * Without mob support we're just going to use raw memory buffer
+ * because we wouldn't be able to support full surface coherency
+ * without mobs. There also no reason to support surface coherency
+ * without 3d (i.e. gpu usage on the host) because then all the
+ * contents is going to be rendered guest side.
+ */
+ if (!dev_priv->has_mob || !vmw_supports_3d(dev_priv)) {
+ int cpp = DIV_ROUND_UP(args->bpp, 8);
+
+ switch (cpp) {
+ case 1: /* DRM_FORMAT_C8 */
+ case 2: /* DRM_FORMAT_RGB565 */
+ case 4: /* DRM_FORMAT_XRGB8888 */
+ break;
+ default:
+ /*
+ * Dumb buffers don't allow anything else.
+ * This is tested via IGT's dumb_buffers
+ */
+ return -EINVAL;
+ }
+
+ args->pitch = args->width * cpp;
+ args->size = ALIGN(args->pitch * args->height, PAGE_SIZE);
+
+ ret = vmw_gem_object_create_with_handle(dev_priv, file_priv,
+ args->size, &args->handle,
+ &vbo);
+ /* drop reference from allocate - handle holds it now */
+ drm_gem_object_put(&vbo->tbo.base);
+ return ret;
+ }
+
+ req->version = drm_vmw_gb_surface_v1;
+ req->multisample_pattern = SVGA3D_MS_PATTERN_NONE;
+ req->quality_level = SVGA3D_MS_QUALITY_NONE;
+ req->buffer_byte_stride = 0;
+ req->must_be_zero = 0;
+ req->base.svga3d_flags = SVGA3D_FLAGS_LOWER_32(flags);
+ req->svga3d_flags_upper_32_bits = SVGA3D_FLAGS_UPPER_32(flags);
+ req->base.format = (uint32_t)format;
+ req->base.drm_surface_flags = drm_vmw_surface_flag_scanout;
+ req->base.drm_surface_flags |= drm_vmw_surface_flag_shareable;
+ req->base.drm_surface_flags |= drm_vmw_surface_flag_create_buffer;
+ req->base.drm_surface_flags |= drm_vmw_surface_flag_coherent;
+ req->base.base_size.width = args->width;
+ req->base.base_size.height = args->height;
+ req->base.base_size.depth = 1;
+ req->base.array_size = 0;
+ req->base.mip_levels = 1;
+ req->base.multisample_count = 0;
+ req->base.buffer_handle = SVGA3D_INVALID_ID;
+ req->base.autogen_filter = SVGA3D_TEX_FILTER_NONE;
+ ret = vmw_gb_surface_define_ext_ioctl(dev, &arg, file_priv);
+ if (ret) {
+ drm_warn(dev, "Unable to create a dumb buffer\n");
+ return ret;
+ }
+
+ args->handle = arg.rep.buffer_handle;
+ args->size = arg.rep.buffer_size;
+ args->pitch = vmw_surface_calculate_pitch(desc, &drm_size);
+
+ ret = vmw_user_resource_lookup_handle(dev_priv, tfile, arg.rep.handle,
+ user_surface_converter,
+ &res);
+ if (ret) {
+ drm_err(dev, "Created resource handle doesn't exist!\n");
+ goto err;
+ }
+
+ vbo = res->guest_memory_bo;
+ vbo->is_dumb = true;
+ vbo->dumb_surface = vmw_res_to_srf(res);
+
+err:
+ if (res)
+ vmw_resource_unreference(&res);
+ if (ret)
+ ttm_ref_object_base_unref(tfile, arg.rep.handle);
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_vkms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_vkms.c
index 3bfcf671fcd5..8651b788e98b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_vkms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_vkms.c
@@ -75,7 +75,7 @@ done:
return ret;
}
-static int
+static void
compute_crc(struct drm_crtc *crtc,
struct vmw_surface *surf,
u32 *crc)
@@ -101,8 +101,6 @@ compute_crc(struct drm_crtc *crtc,
}
vmw_bo_unmap(bo);
-
- return 0;
}
static void
@@ -116,7 +114,6 @@ crc_generate_worker(struct work_struct *work)
u64 frame_start, frame_end;
u32 crc32 = 0;
struct vmw_surface *surf = 0;
- int ret;
spin_lock_irq(&du->vkms.crc_state_lock);
crc_pending = du->vkms.crc_pending;
@@ -130,22 +127,24 @@ crc_generate_worker(struct work_struct *work)
return;
spin_lock_irq(&du->vkms.crc_state_lock);
- surf = du->vkms.surface;
+ surf = vmw_surface_reference(du->vkms.surface);
spin_unlock_irq(&du->vkms.crc_state_lock);
- if (vmw_surface_sync(vmw, surf)) {
- drm_warn(crtc->dev, "CRC worker wasn't able to sync the crc surface!\n");
- return;
- }
+ if (surf) {
+ if (vmw_surface_sync(vmw, surf)) {
+ drm_warn(
+ crtc->dev,
+ "CRC worker wasn't able to sync the crc surface!\n");
+ return;
+ }
- ret = compute_crc(crtc, surf, &crc32);
- if (ret)
- return;
+ compute_crc(crtc, surf, &crc32);
+ vmw_surface_unreference(&surf);
+ }
spin_lock_irq(&du->vkms.crc_state_lock);
frame_start = du->vkms.frame_start;
frame_end = du->vkms.frame_end;
- crc_pending = du->vkms.crc_pending;
du->vkms.frame_start = 0;
du->vkms.frame_end = 0;
du->vkms.crc_pending = false;
@@ -164,7 +163,7 @@ vmw_vkms_vblank_simulate(struct hrtimer *timer)
struct vmw_display_unit *du = container_of(timer, struct vmw_display_unit, vkms.timer);
struct drm_crtc *crtc = &du->crtc;
struct vmw_private *vmw = vmw_priv(crtc->dev);
- struct vmw_surface *surf = NULL;
+ bool has_surface = false;
u64 ret_overrun;
bool locked, ret;
@@ -179,10 +178,10 @@ vmw_vkms_vblank_simulate(struct hrtimer *timer)
WARN_ON(!ret);
if (!locked)
return HRTIMER_RESTART;
- surf = du->vkms.surface;
+ has_surface = du->vkms.surface != NULL;
vmw_vkms_unlock(crtc);
- if (du->vkms.crc_enabled && surf) {
+ if (du->vkms.crc_enabled && has_surface) {
u64 frame = drm_crtc_accurate_vblank_count(crtc);
spin_lock(&du->vkms.crc_state_lock);
@@ -336,6 +335,8 @@ vmw_vkms_crtc_cleanup(struct drm_crtc *crtc)
{
struct vmw_display_unit *du = vmw_crtc_to_du(crtc);
+ if (du->vkms.surface)
+ vmw_surface_unreference(&du->vkms.surface);
WARN_ON(work_pending(&du->vkms.crc_generator_work));
hrtimer_cancel(&du->vkms.timer);
}
@@ -497,9 +498,12 @@ vmw_vkms_set_crc_surface(struct drm_crtc *crtc,
struct vmw_display_unit *du = vmw_crtc_to_du(crtc);
struct vmw_private *vmw = vmw_priv(crtc->dev);
- if (vmw->vkms_enabled) {
+ if (vmw->vkms_enabled && du->vkms.surface != surf) {
WARN_ON(atomic_read(&du->vkms.atomic_lock) != VMW_VKMS_LOCK_MODESET);
- du->vkms.surface = surf;
+ if (du->vkms.surface)
+ vmw_surface_unreference(&du->vkms.surface);
+ if (surf)
+ du->vkms.surface = vmw_surface_reference(surf);
}
}
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 628c245c4822..e97c9da451b3 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -25,12 +25,14 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \
uses_generated_oob := \
$(obj)/xe_ggtt.o \
+ $(obj)/xe_device.o \
$(obj)/xe_gsc.o \
$(obj)/xe_gt.o \
$(obj)/xe_guc.o \
$(obj)/xe_guc_ads.o \
$(obj)/xe_guc_pc.o \
$(obj)/xe_migrate.o \
+ $(obj)/xe_pat.o \
$(obj)/xe_ring_ops.o \
$(obj)/xe_vm.o \
$(obj)/xe_wa.o \
diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c
index 8b83dcff72e1..49de4e4f8a75 100644
--- a/drivers/gpu/drm/xe/display/xe_display.c
+++ b/drivers/gpu/drm/xe/display/xe_display.c
@@ -132,6 +132,7 @@ static void xe_display_fini_noirq(void *arg)
return;
intel_display_driver_remove_noirq(xe);
+ intel_opregion_cleanup(xe);
}
int xe_display_init_noirq(struct xe_device *xe)
@@ -157,8 +158,10 @@ int xe_display_init_noirq(struct xe_device *xe)
intel_display_device_info_runtime_init(xe);
err = intel_display_driver_probe_noirq(xe);
- if (err)
+ if (err) {
+ intel_opregion_cleanup(xe);
return err;
+ }
return devm_add_action_or_reset(xe->drm.dev, xe_display_fini_noirq, xe);
}
@@ -280,6 +283,27 @@ static bool suspend_to_idle(void)
return false;
}
+static void xe_display_flush_cleanup_work(struct xe_device *xe)
+{
+ struct intel_crtc *crtc;
+
+ for_each_intel_crtc(&xe->drm, crtc) {
+ struct drm_crtc_commit *commit;
+
+ spin_lock(&crtc->base.commit_lock);
+ commit = list_first_entry_or_null(&crtc->base.commit_list,
+ struct drm_crtc_commit, commit_entry);
+ if (commit)
+ drm_crtc_commit_get(commit);
+ spin_unlock(&crtc->base.commit_lock);
+
+ if (commit) {
+ wait_for_completion(&commit->cleanup_done);
+ drm_crtc_commit_put(commit);
+ }
+ }
+}
+
void xe_display_pm_suspend(struct xe_device *xe, bool runtime)
{
bool s2idle = suspend_to_idle();
@@ -297,6 +321,8 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime)
if (!runtime)
intel_display_driver_suspend(xe);
+ xe_display_flush_cleanup_work(xe);
+
intel_dp_mst_suspend(xe);
intel_hpd_cancel_work(xe);
diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
index 9e860c61f4b3..ccd0d87d438a 100644
--- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
+++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
@@ -7,6 +7,8 @@
#include "intel_display_types.h"
#include "intel_dsb_buffer.h"
#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_device_types.h"
#include "xe_gt.h"
u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf)
@@ -16,7 +18,10 @@ u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf)
void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val)
{
+ struct xe_device *xe = dsb_buf->vma->bo->tile->xe;
+
iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val);
+ xe_device_l2_flush(xe);
}
u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx)
@@ -26,9 +31,12 @@ u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx)
void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size)
{
+ struct xe_device *xe = dsb_buf->vma->bo->tile->xe;
+
WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf));
iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val, size);
+ xe_device_l2_flush(xe);
}
bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size)
diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c
index 423f367c7065..d7db44e79eaf 100644
--- a/drivers/gpu/drm/xe/display/xe_fb_pin.c
+++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c
@@ -10,6 +10,7 @@
#include "intel_fb.h"
#include "intel_fb_pin.h"
#include "xe_bo.h"
+#include "xe_device.h"
#include "xe_ggtt.h"
#include "xe_gt.h"
#include "xe_pm.h"
@@ -304,6 +305,8 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb,
if (ret)
goto err_unpin;
+ /* Ensure DPT writes are flushed */
+ xe_device_l2_flush(xe);
return vma;
err_unpin:
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index d44564bad009..3c2865040058 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -80,6 +80,9 @@
#define LE_CACHEABILITY_MASK REG_GENMASK(1, 0)
#define LE_CACHEABILITY(value) REG_FIELD_PREP(LE_CACHEABILITY_MASK, value)
+#define XE2_GAMREQSTRM_CTRL XE_REG(0x4194)
+#define CG_DIS_CNTLBUS REG_BIT(6)
+
#define CCS_AUX_INV XE_REG(0x4208)
#define VD0_AUX_INV XE_REG(0x4218)
@@ -372,6 +375,11 @@
#define XEHPC_L3CLOS_MASK(i) XE_REG_MCR(0xb194 + (i) * 8)
+#define XE2_GLOBAL_INVAL XE_REG(0xb404)
+
+#define SCRATCH1LPFC XE_REG(0xb474)
+#define EN_L3_RW_CCS_CACHE_FLUSH REG_BIT(0)
+
#define XE2LPM_L3SQCREG5 XE_REG_MCR(0xb658)
#define XE2_TDF_CTRL XE_REG(0xb418)
@@ -429,6 +437,7 @@
#define DIS_FIX_EOT1_FLUSH REG_BIT(9)
#define TDL_TSL_CHICKEN XE_REG_MCR(0xe4c4, XE_REG_OPTION_MASKED)
+#define STK_ID_RESTRICT REG_BIT(12)
#define SLM_WMTP_RESTORE REG_BIT(11)
#define ROW_CHICKEN XE_REG_MCR(0xe4f0, XE_REG_OPTION_MASKED)
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 31192d983d9e..261d3d6c8a93 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1575,7 +1575,7 @@ struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
return bo;
}
-static void __xe_bo_unpin_map_no_vm(struct drm_device *drm, void *arg)
+static void __xe_bo_unpin_map_no_vm(void *arg)
{
xe_bo_unpin_map_no_vm(arg);
}
@@ -1590,7 +1590,7 @@ struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile
if (IS_ERR(bo))
return bo;
- ret = drmm_add_action_or_reset(&xe->drm, __xe_bo_unpin_map_no_vm, bo);
+ ret = devm_add_action_or_reset(xe->drm.dev, __xe_bo_unpin_map_no_vm, bo);
if (ret)
return ERR_PTR(ret);
@@ -1638,7 +1638,7 @@ int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, str
if (IS_ERR(bo))
return PTR_ERR(bo);
- drmm_release_action(&xe->drm, __xe_bo_unpin_map_no_vm, *src);
+ devm_release_action(xe->drm.dev, __xe_bo_unpin_map_no_vm, *src);
*src = bo;
return 0;
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 76109415eba6..c89deffffb6d 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -54,6 +54,9 @@
#include "xe_vm.h"
#include "xe_vram.h"
#include "xe_wait_user_fence.h"
+#include "xe_wa.h"
+
+#include <generated/xe_wa_oob.h>
static int xe_file_open(struct drm_device *dev, struct drm_file *file)
{
@@ -87,9 +90,55 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file)
spin_unlock(&xe->clients.lock);
file->driver_priv = xef;
+ kref_init(&xef->refcount);
+
return 0;
}
+static void xe_file_destroy(struct kref *ref)
+{
+ struct xe_file *xef = container_of(ref, struct xe_file, refcount);
+ struct xe_device *xe = xef->xe;
+
+ xa_destroy(&xef->exec_queue.xa);
+ mutex_destroy(&xef->exec_queue.lock);
+ xa_destroy(&xef->vm.xa);
+ mutex_destroy(&xef->vm.lock);
+
+ spin_lock(&xe->clients.lock);
+ xe->clients.count--;
+ spin_unlock(&xe->clients.lock);
+
+ xe_drm_client_put(xef->client);
+ kfree(xef);
+}
+
+/**
+ * xe_file_get() - Take a reference to the xe file object
+ * @xef: Pointer to the xe file
+ *
+ * Anyone with a pointer to xef must take a reference to the xe file
+ * object using this call.
+ *
+ * Return: xe file pointer
+ */
+struct xe_file *xe_file_get(struct xe_file *xef)
+{
+ kref_get(&xef->refcount);
+ return xef;
+}
+
+/**
+ * xe_file_put() - Drop a reference to the xe file object
+ * @xef: Pointer to the xe file
+ *
+ * Used to drop reference to the xef object
+ */
+void xe_file_put(struct xe_file *xef)
+{
+ kref_put(&xef->refcount, xe_file_destroy);
+}
+
static void xe_file_close(struct drm_device *dev, struct drm_file *file)
{
struct xe_device *xe = to_xe_device(dev);
@@ -98,6 +147,8 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file)
struct xe_exec_queue *q;
unsigned long idx;
+ xe_pm_runtime_get(xe);
+
/*
* No need for exec_queue.lock here as there is no contention for it
* when FD is closing as IOCTLs presumably can't be modifying the
@@ -108,21 +159,14 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file)
xe_exec_queue_kill(q);
xe_exec_queue_put(q);
}
- xa_destroy(&xef->exec_queue.xa);
- mutex_destroy(&xef->exec_queue.lock);
mutex_lock(&xef->vm.lock);
xa_for_each(&xef->vm.xa, idx, vm)
xe_vm_close_and_put(vm);
mutex_unlock(&xef->vm.lock);
- xa_destroy(&xef->vm.xa);
- mutex_destroy(&xef->vm.lock);
- spin_lock(&xe->clients.lock);
- xe->clients.count--;
- spin_unlock(&xe->clients.lock);
+ xe_file_put(xef);
- xe_drm_client_put(xef->client);
- kfree(xef);
+ xe_pm_runtime_put(xe);
}
static const struct drm_ioctl_desc xe_ioctls[] = {
@@ -779,6 +823,11 @@ void xe_device_td_flush(struct xe_device *xe)
if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
return;
+ if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) {
+ xe_device_l2_flush(xe);
+ return;
+ }
+
for_each_gt(gt, xe, id) {
if (xe_gt_is_media_type(gt))
continue;
@@ -802,6 +851,30 @@ void xe_device_td_flush(struct xe_device *xe)
}
}
+void xe_device_l2_flush(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ int err;
+
+ gt = xe_root_mmio_gt(xe);
+
+ if (!XE_WA(gt, 16023588340))
+ return;
+
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (err)
+ return;
+
+ spin_lock(&gt->global_invl_lock);
+ xe_mmio_write32(gt, XE2_GLOBAL_INVAL, 0x1);
+
+ if (xe_mmio_wait32(gt, XE2_GLOBAL_INVAL, 0x1, 0x0, 150, NULL, true))
+ xe_gt_err_once(gt, "Global invalidation timeout\n");
+ spin_unlock(&gt->global_invl_lock);
+
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
+
u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
{
return xe_device_has_flat_ccs(xe) ?
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index bb07f5669dbb..533ccfb2567a 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -162,6 +162,7 @@ u64 xe_device_canonicalize_addr(struct xe_device *xe, u64 address);
u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address);
void xe_device_td_flush(struct xe_device *xe);
+void xe_device_l2_flush(struct xe_device *xe);
static inline bool xe_device_wedged(struct xe_device *xe)
{
@@ -170,4 +171,7 @@ static inline bool xe_device_wedged(struct xe_device *xe)
void xe_device_declare_wedged(struct xe_device *xe);
+struct xe_file *xe_file_get(struct xe_file *xef);
+void xe_file_put(struct xe_file *xef);
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 3bca6d344744..cbc582bcc90a 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -566,6 +566,9 @@ struct xe_file {
/** @client: drm client */
struct xe_drm_client *client;
+
+ /** @refcount: ref count of this xe file */
+ struct kref refcount;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
index 6a26923fa10e..7ddd59908334 100644
--- a/drivers/gpu/drm/xe/xe_drm_client.c
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -251,11 +251,8 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file)
/* Accumulate all the exec queues from this client */
mutex_lock(&xef->exec_queue.lock);
- xa_for_each(&xef->exec_queue.xa, i, q) {
+ xa_for_each(&xef->exec_queue.xa, i, q)
xe_exec_queue_update_run_ticks(q);
- xef->run_ticks[q->class] += q->run_ticks - q->old_run_ticks;
- q->old_run_ticks = q->run_ticks;
- }
mutex_unlock(&xef->exec_queue.lock);
/* Get the total GPU cycles */
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 0ba37835849b..9731dcd0b1bd 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -37,6 +37,10 @@ static void __xe_exec_queue_free(struct xe_exec_queue *q)
{
if (q->vm)
xe_vm_put(q->vm);
+
+ if (q->xef)
+ xe_file_put(q->xef);
+
kfree(q);
}
@@ -101,22 +105,35 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
static int __xe_exec_queue_init(struct xe_exec_queue *q)
{
+ struct xe_vm *vm = q->vm;
int i, err;
+ if (vm) {
+ err = xe_vm_lock(vm, true);
+ if (err)
+ return err;
+ }
+
for (i = 0; i < q->width; ++i) {
q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K);
if (IS_ERR(q->lrc[i])) {
err = PTR_ERR(q->lrc[i]);
- goto err_lrc;
+ goto err_unlock;
}
}
+ if (vm)
+ xe_vm_unlock(vm);
+
err = q->ops->init(q);
if (err)
goto err_lrc;
return 0;
+err_unlock:
+ if (vm)
+ xe_vm_unlock(vm);
err_lrc:
for (i = i - 1; i >= 0; --i)
xe_lrc_put(q->lrc[i]);
@@ -136,15 +153,7 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v
if (IS_ERR(q))
return q;
- if (vm) {
- err = xe_vm_lock(vm, true);
- if (err)
- goto err_post_alloc;
- }
-
err = __xe_exec_queue_init(q);
- if (vm)
- xe_vm_unlock(vm);
if (err)
goto err_post_alloc;
@@ -634,7 +643,6 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
if (xe_vm_in_preempt_fence_mode(vm)) {
q->lr.context = dma_fence_context_alloc(1);
- spin_lock_init(&q->lr.lock);
err = xe_vm_add_compute_exec_queue(vm, q);
if (XE_IOCTL_DBG(xe, err))
@@ -649,6 +657,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
goto kill_exec_queue;
args->exec_queue_id = id;
+ q->xef = xe_file_get(xef);
return 0;
@@ -762,6 +771,7 @@ bool xe_exec_queue_is_idle(struct xe_exec_queue *q)
*/
void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
{
+ struct xe_file *xef;
struct xe_lrc *lrc;
u32 old_ts, new_ts;
@@ -773,6 +783,8 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
if (!q->vm || !q->vm->xef)
return;
+ xef = q->vm->xef;
+
/*
* Only sample the first LRC. For parallel submission, all of them are
* scheduled together and we compensate that below by multiplying by
@@ -783,7 +795,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
*/
lrc = q->lrc[0];
new_ts = xe_lrc_update_timestamp(lrc, &old_ts);
- q->run_ticks += (new_ts - old_ts) * q->width;
+ xef->run_ticks[q->class] += (new_ts - old_ts) * q->width;
}
void xe_exec_queue_kill(struct xe_exec_queue *q)
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index 201588ec33c3..f6ee0ae80fd6 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -38,6 +38,9 @@ enum xe_exec_queue_priority {
* a kernel object.
*/
struct xe_exec_queue {
+ /** @xef: Back pointer to xe file if this is user created exec queue */
+ struct xe_file *xef;
+
/** @gt: graphics tile this exec queue can submit to */
struct xe_gt *gt;
/**
@@ -123,8 +126,6 @@ struct xe_exec_queue {
u32 seqno;
/** @lr.link: link into VM's list of exec queues */
struct list_head link;
- /** @lr.lock: preemption fences lock */
- spinlock_t lock;
} lr;
/** @ops: submission backend exec queue operations */
@@ -139,10 +140,6 @@ struct xe_exec_queue {
* Protected by @vm's resv. Unused if @vm == NULL.
*/
u64 tlb_flush_seqno;
- /** @old_run_ticks: prior hw engine class run time in ticks for this exec queue */
- u64 old_run_ticks;
- /** @run_ticks: hw engine class run time in ticks for this exec queue */
- u64 run_ticks;
/** @lrc: logical ring context for this exec queue */
struct xe_lrc *lrc[];
};
diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
index f8239a13fa2b..2a612652bb13 100644
--- a/drivers/gpu/drm/xe/xe_gsc.c
+++ b/drivers/gpu/drm/xe/xe_gsc.c
@@ -260,7 +260,7 @@ static int gsc_upload_and_init(struct xe_gsc *gsc)
struct xe_tile *tile = gt_to_tile(gt);
int ret;
- if (XE_WA(gt, 14018094691)) {
+ if (XE_WA(tile->primary_gt, 14018094691)) {
ret = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL);
/*
@@ -278,7 +278,7 @@ static int gsc_upload_and_init(struct xe_gsc *gsc)
ret = gsc_upload(gsc);
- if (XE_WA(gt, 14018094691))
+ if (XE_WA(tile->primary_gt, 14018094691))
xe_force_wake_put(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL);
if (ret)
@@ -437,7 +437,7 @@ out:
return ret;
}
-static void free_resources(struct drm_device *drm, void *arg)
+static void free_resources(void *arg)
{
struct xe_gsc *gsc = arg;
@@ -501,7 +501,7 @@ int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc)
gsc->q = q;
gsc->wq = wq;
- err = drmm_add_action_or_reset(&xe->drm, free_resources, gsc);
+ err = devm_add_action_or_reset(xe->drm.dev, free_resources, gsc);
if (err)
return err;
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 31b2e64c70c6..b9bcbbe27705 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -11,6 +11,8 @@
#include <drm/xe_drm.h>
#include <generated/xe_wa_oob.h>
+#include <generated/xe_wa_oob.h>
+
#include "instructions/xe_gfxpipe_commands.h"
#include "instructions/xe_mi_commands.h"
#include "regs/xe_gt_regs.h"
@@ -95,6 +97,51 @@ void xe_gt_sanitize(struct xe_gt *gt)
gt->uc.guc.submission_state.enabled = false;
}
+static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)
+{
+ u32 reg;
+ int err;
+
+ if (!XE_WA(gt, 16023588340))
+ return;
+
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (WARN_ON(err))
+ return;
+
+ if (!xe_gt_is_media_type(gt)) {
+ xe_mmio_write32(gt, SCRATCH1LPFC, EN_L3_RW_CCS_CACHE_FLUSH);
+ reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL);
+ reg |= CG_DIS_CNTLBUS;
+ xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg);
+ }
+
+ xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3);
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
+
+static void xe_gt_disable_host_l2_vram(struct xe_gt *gt)
+{
+ u32 reg;
+ int err;
+
+ if (!XE_WA(gt, 16023588340))
+ return;
+
+ if (xe_gt_is_media_type(gt))
+ return;
+
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (WARN_ON(err))
+ return;
+
+ reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL);
+ reg &= ~CG_DIS_CNTLBUS;
+ xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg);
+
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
+
/**
* xe_gt_remove() - Clean up the GT structures before driver removal
* @gt: the GT object
@@ -111,6 +158,8 @@ void xe_gt_remove(struct xe_gt *gt)
for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
xe_hw_fence_irq_finish(&gt->fence_irq[i]);
+
+ xe_gt_disable_host_l2_vram(gt);
}
static void gt_reset_worker(struct work_struct *w);
@@ -339,6 +388,7 @@ int xe_gt_init_early(struct xe_gt *gt)
xe_force_wake_init_gt(gt, gt_to_fw(gt));
xe_pcode_init(gt);
+ spin_lock_init(&gt->global_invl_lock);
return 0;
}
@@ -508,6 +558,7 @@ int xe_gt_init_hwconfig(struct xe_gt *gt)
xe_gt_mcr_init_early(gt);
xe_pat_init(gt);
+ xe_gt_enable_host_l2_vram(gt);
err = xe_uc_init(&gt->uc);
if (err)
@@ -643,6 +694,8 @@ static int do_gt_restart(struct xe_gt *gt)
xe_pat_init(gt);
+ xe_gt_enable_host_l2_vram(gt);
+
xe_gt_mcr_set_implicit_defaults(gt);
xe_reg_sr_apply_mmio(&gt->reg_sr, gt);
@@ -796,6 +849,8 @@ int xe_gt_suspend(struct xe_gt *gt)
xe_gt_idle_disable_pg(gt);
+ xe_gt_disable_host_l2_vram(gt);
+
XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
xe_gt_dbg(gt, "suspended\n");
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 9292d5468868..b2a7fa55bd18 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -382,6 +382,18 @@ static void pf_queue_work_func(struct work_struct *w)
static void acc_queue_work_func(struct work_struct *w);
+static void pagefault_fini(void *arg)
+{
+ struct xe_gt *gt = arg;
+ struct xe_device *xe = gt_to_xe(gt);
+
+ if (!xe->info.has_usm)
+ return;
+
+ destroy_workqueue(gt->usm.acc_wq);
+ destroy_workqueue(gt->usm.pf_wq);
+}
+
int xe_gt_pagefault_init(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
@@ -409,10 +421,12 @@ int xe_gt_pagefault_init(struct xe_gt *gt)
gt->usm.acc_wq = alloc_workqueue("xe_gt_access_counter_work_queue",
WQ_UNBOUND | WQ_HIGHPRI,
NUM_ACC_QUEUE);
- if (!gt->usm.acc_wq)
+ if (!gt->usm.acc_wq) {
+ destroy_workqueue(gt->usm.pf_wq);
return -ENOMEM;
+ }
- return 0;
+ return devm_add_action_or_reset(xe->drm.dev, pagefault_fini, gt);
}
void xe_gt_pagefault_reset(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
index 4699b7836001..b6f0a7299c03 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
@@ -1927,6 +1927,7 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid)
{
struct xe_gt *primary_gt = gt_to_tile(gt)->primary_gt;
struct xe_device *xe = gt_to_xe(gt);
+ bool is_primary = !xe_gt_is_media_type(gt);
bool valid_ggtt, valid_ctxs, valid_dbs;
bool valid_any, valid_all;
@@ -1935,13 +1936,17 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid)
valid_dbs = pf_get_vf_config_dbs(gt, vfid);
/* note that GuC doorbells are optional */
- valid_any = valid_ggtt || valid_ctxs || valid_dbs;
- valid_all = valid_ggtt && valid_ctxs;
+ valid_any = valid_ctxs || valid_dbs;
+ valid_all = valid_ctxs;
+
+ /* and GGTT/LMEM is configured on primary GT only */
+ valid_all = valid_all && valid_ggtt;
+ valid_any = valid_any || (valid_ggtt && is_primary);
if (IS_DGFX(xe)) {
bool valid_lmem = pf_get_vf_config_ggtt(primary_gt, vfid);
- valid_any = valid_any || valid_lmem;
+ valid_any = valid_any || (valid_lmem && is_primary);
valid_all = valid_all && valid_lmem;
}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
index 41e46a00c01e..8892d6c2291e 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
@@ -850,7 +850,7 @@ static struct vf_runtime_reg *vf_lookup_reg(struct xe_gt *gt, u32 addr)
xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
- return bsearch(&key, runtime->regs, runtime->regs_size, sizeof(key),
+ return bsearch(&key, runtime->regs, runtime->num_regs, sizeof(key),
vf_runtime_reg_cmp);
}
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index d9359976ab8b..481d83d07367 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -13,10 +13,13 @@
#include "xe_guc.h"
#include "xe_guc_ct.h"
#include "xe_mmio.h"
+#include "xe_pm.h"
#include "xe_sriov.h"
#include "xe_trace.h"
#include "regs/xe_guc_regs.h"
+#define FENCE_STACK_BIT DMA_FENCE_FLAG_USER_BITS
+
/*
* TLB inval depends on pending commands in the CT queue and then the real
* invalidation time. Double up the time to process full CT queue
@@ -33,6 +36,24 @@ static long tlb_timeout_jiffies(struct xe_gt *gt)
return hw_tlb_timeout + 2 * delay;
}
+static void
+__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
+{
+ bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags);
+
+ trace_xe_gt_tlb_invalidation_fence_signal(xe, fence);
+ xe_gt_tlb_invalidation_fence_fini(fence);
+ dma_fence_signal(&fence->base);
+ if (!stack)
+ dma_fence_put(&fence->base);
+}
+
+static void
+invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
+{
+ list_del(&fence->link);
+ __invalidation_fence_signal(xe, fence);
+}
static void xe_gt_tlb_fence_timeout(struct work_struct *work)
{
@@ -54,10 +75,8 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work)
xe_gt_err(gt, "TLB invalidation fence timeout, seqno=%d recv=%d",
fence->seqno, gt->tlb_invalidation.seqno_recv);
- list_del(&fence->link);
fence->base.error = -ETIME;
- dma_fence_signal(&fence->base);
- dma_fence_put(&fence->base);
+ invalidation_fence_signal(xe, fence);
}
if (!list_empty(&gt->tlb_invalidation.pending_fences))
queue_delayed_work(system_wq,
@@ -87,21 +106,6 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
return 0;
}
-static void
-__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
-{
- trace_xe_gt_tlb_invalidation_fence_signal(xe, fence);
- dma_fence_signal(&fence->base);
- dma_fence_put(&fence->base);
-}
-
-static void
-invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
-{
- list_del(&fence->link);
- __invalidation_fence_signal(xe, fence);
-}
-
/**
* xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset
* @gt: graphics tile
@@ -111,7 +115,6 @@ invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fe
void xe_gt_tlb_invalidation_reset(struct xe_gt *gt)
{
struct xe_gt_tlb_invalidation_fence *fence, *next;
- struct xe_guc *guc = &gt->uc.guc;
int pending_seqno;
/*
@@ -134,7 +137,6 @@ void xe_gt_tlb_invalidation_reset(struct xe_gt *gt)
else
pending_seqno = gt->tlb_invalidation.seqno - 1;
WRITE_ONCE(gt->tlb_invalidation.seqno_recv, pending_seqno);
- wake_up_all(&guc->ct.wq);
list_for_each_entry_safe(fence, next,
&gt->tlb_invalidation.pending_fences, link)
@@ -165,6 +167,8 @@ static int send_tlb_invalidation(struct xe_guc *guc,
int seqno;
int ret;
+ xe_gt_assert(gt, fence);
+
/*
* XXX: The seqno algorithm relies on TLB invalidation being processed
* in order which they currently are, if that changes the algorithm will
@@ -173,10 +177,8 @@ static int send_tlb_invalidation(struct xe_guc *guc,
mutex_lock(&guc->ct.lock);
seqno = gt->tlb_invalidation.seqno;
- if (fence) {
- fence->seqno = seqno;
- trace_xe_gt_tlb_invalidation_fence_send(xe, fence);
- }
+ fence->seqno = seqno;
+ trace_xe_gt_tlb_invalidation_fence_send(xe, fence);
action[1] = seqno;
ret = xe_guc_ct_send_locked(&guc->ct, action, len,
G2H_LEN_DW_TLB_INVALIDATE, 1);
@@ -209,7 +211,6 @@ static int send_tlb_invalidation(struct xe_guc *guc,
TLB_INVALIDATION_SEQNO_MAX;
if (!gt->tlb_invalidation.seqno)
gt->tlb_invalidation.seqno = 1;
- ret = seqno;
}
mutex_unlock(&guc->ct.lock);
@@ -223,14 +224,16 @@ static int send_tlb_invalidation(struct xe_guc *guc,
/**
* xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC
* @gt: graphics tile
+ * @fence: invalidation fence which will be signal on TLB invalidation
+ * completion
*
* Issue a TLB invalidation for the GuC. Completion of TLB is asynchronous and
- * caller can use seqno + xe_gt_tlb_invalidation_wait to wait for completion.
+ * caller can use the invalidation fence to wait for completion.
*
- * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
- * negative error code on error.
+ * Return: 0 on success, negative error code on error
*/
-static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt)
+static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt,
+ struct xe_gt_tlb_invalidation_fence *fence)
{
u32 action[] = {
XE_GUC_ACTION_TLB_INVALIDATION,
@@ -238,7 +241,7 @@ static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt)
MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC),
};
- return send_tlb_invalidation(&gt->uc.guc, NULL, action,
+ return send_tlb_invalidation(&gt->uc.guc, fence, action,
ARRAY_SIZE(action));
}
@@ -257,13 +260,17 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
if (xe_guc_ct_enabled(&gt->uc.guc.ct) &&
gt->uc.guc.submission_state.enabled) {
- int seqno;
-
- seqno = xe_gt_tlb_invalidation_guc(gt);
- if (seqno <= 0)
- return seqno;
+ struct xe_gt_tlb_invalidation_fence fence;
+ int ret;
+
+ xe_gt_tlb_invalidation_fence_init(gt, &fence, true);
+ ret = xe_gt_tlb_invalidation_guc(gt, &fence);
+ if (ret < 0) {
+ xe_gt_tlb_invalidation_fence_fini(&fence);
+ return ret;
+ }
- xe_gt_tlb_invalidation_wait(gt, seqno);
+ xe_gt_tlb_invalidation_fence_wait(&fence);
} else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) {
if (IS_SRIOV_VF(xe))
return 0;
@@ -290,18 +297,16 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
*
* @gt: graphics tile
* @fence: invalidation fence which will be signal on TLB invalidation
- * completion, can be NULL
+ * completion
* @start: start address
* @end: end address
* @asid: address space id
*
* Issue a range based TLB invalidation if supported, if not fallback to a full
- * TLB invalidation. Completion of TLB is asynchronous and caller can either use
- * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for
- * completion.
+ * TLB invalidation. Completion of TLB is asynchronous and caller can use
+ * the invalidation fence to wait for completion.
*
- * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
- * negative error code on error.
+ * Return: Negative error code on error, 0 on success
*/
int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
struct xe_gt_tlb_invalidation_fence *fence,
@@ -312,11 +317,11 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
u32 action[MAX_TLB_INVALIDATION_LEN];
int len = 0;
+ xe_gt_assert(gt, fence);
+
/* Execlists not supported */
if (gt_to_xe(gt)->info.force_execlist) {
- if (fence)
- __invalidation_fence_signal(xe, fence);
-
+ __invalidation_fence_signal(xe, fence);
return 0;
}
@@ -382,12 +387,10 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
* @vma: VMA to invalidate
*
* Issue a range based TLB invalidation if supported, if not fallback to a full
- * TLB invalidation. Completion of TLB is asynchronous and caller can either use
- * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for
- * completion.
+ * TLB invalidation. Completion of TLB is asynchronous and caller can use
+ * the invalidation fence to wait for completion.
*
- * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
- * negative error code on error.
+ * Return: Negative error code on error, 0 on success
*/
int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
struct xe_gt_tlb_invalidation_fence *fence,
@@ -401,43 +404,6 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
}
/**
- * xe_gt_tlb_invalidation_wait - Wait for TLB to complete
- * @gt: graphics tile
- * @seqno: seqno to wait which was returned from xe_gt_tlb_invalidation
- *
- * Wait for tlb_timeout_jiffies() for a TLB invalidation to complete.
- *
- * Return: 0 on success, -ETIME on TLB invalidation timeout
- */
-int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
-{
- struct xe_guc *guc = &gt->uc.guc;
- int ret;
-
- /* Execlists not supported */
- if (gt_to_xe(gt)->info.force_execlist)
- return 0;
-
- /*
- * XXX: See above, this algorithm only works if seqno are always in
- * order
- */
- ret = wait_event_timeout(guc->ct.wq,
- tlb_invalidation_seqno_past(gt, seqno),
- tlb_timeout_jiffies(gt));
- if (!ret) {
- struct drm_printer p = xe_gt_err_printer(gt);
-
- xe_gt_err(gt, "TLB invalidation time'd out, seqno=%d, recv=%d\n",
- seqno, gt->tlb_invalidation.seqno_recv);
- xe_guc_ct_print(&guc->ct, &p, true);
- return -ETIME;
- }
-
- return 0;
-}
-
-/**
* xe_guc_tlb_invalidation_done_handler - TLB invalidation done handler
* @guc: guc
* @msg: message indicating TLB invalidation done
@@ -480,12 +446,7 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
return 0;
}
- /*
- * wake_up_all() and wait_event_timeout() already have the correct
- * barriers.
- */
WRITE_ONCE(gt->tlb_invalidation.seqno_recv, msg[0]);
- wake_up_all(&guc->ct.wq);
list_for_each_entry_safe(fence, next,
&gt->tlb_invalidation.pending_fences, link) {
@@ -508,3 +469,59 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
return 0;
}
+
+static const char *
+invalidation_fence_get_driver_name(struct dma_fence *dma_fence)
+{
+ return "xe";
+}
+
+static const char *
+invalidation_fence_get_timeline_name(struct dma_fence *dma_fence)
+{
+ return "invalidation_fence";
+}
+
+static const struct dma_fence_ops invalidation_fence_ops = {
+ .get_driver_name = invalidation_fence_get_driver_name,
+ .get_timeline_name = invalidation_fence_get_timeline_name,
+};
+
+/**
+ * xe_gt_tlb_invalidation_fence_init - Initialize TLB invalidation fence
+ * @gt: GT
+ * @fence: TLB invalidation fence to initialize
+ * @stack: fence is stack variable
+ *
+ * Initialize TLB invalidation fence for use. xe_gt_tlb_invalidation_fence_fini
+ * must be called if fence is not signaled.
+ */
+void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt,
+ struct xe_gt_tlb_invalidation_fence *fence,
+ bool stack)
+{
+ xe_pm_runtime_get_noresume(gt_to_xe(gt));
+
+ spin_lock_irq(&gt->tlb_invalidation.lock);
+ dma_fence_init(&fence->base, &invalidation_fence_ops,
+ &gt->tlb_invalidation.lock,
+ dma_fence_context_alloc(1), 1);
+ spin_unlock_irq(&gt->tlb_invalidation.lock);
+ INIT_LIST_HEAD(&fence->link);
+ if (stack)
+ set_bit(FENCE_STACK_BIT, &fence->base.flags);
+ else
+ dma_fence_get(&fence->base);
+ fence->gt = gt;
+}
+
+/**
+ * xe_gt_tlb_invalidation_fence_fini - Finalize TLB invalidation fence
+ * @fence: TLB invalidation fence to finalize
+ *
+ * Drop PM ref which fence took durinig init.
+ */
+void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence)
+{
+ xe_pm_runtime_put(gt_to_xe(fence->gt));
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
index bf3bebd9f985..a84065fa324c 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
@@ -23,7 +23,17 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
struct xe_gt_tlb_invalidation_fence *fence,
u64 start, u64 end, u32 asid);
-int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno);
int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
+void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt,
+ struct xe_gt_tlb_invalidation_fence *fence,
+ bool stack);
+void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence);
+
+static inline void
+xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence)
+{
+ dma_fence_wait(&fence->base, false);
+}
+
#endif /* _XE_GT_TLB_INVALIDATION_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h
index 934c828efe31..de6e825e0851 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h
@@ -8,6 +8,8 @@
#include <linux/dma-fence.h>
+struct xe_gt;
+
/**
* struct xe_gt_tlb_invalidation_fence - XE GT TLB invalidation fence
*
@@ -17,6 +19,8 @@
struct xe_gt_tlb_invalidation_fence {
/** @base: dma fence base */
struct dma_fence base;
+ /** @gt: GT which fence belong to */
+ struct xe_gt *gt;
/** @link: link into list of pending tlb fences */
struct list_head link;
/** @seqno: seqno of TLB invalidation to signal fence one */
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 6b5e0b45efb0..38a0d0e178c8 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -362,6 +362,12 @@ struct xe_gt {
*/
spinlock_t mcr_lock;
+ /**
+ * @global_invl_lock: protects the register for the duration
+ * of a global invalidation of l2 cache
+ */
+ spinlock_t global_invl_lock;
+
/** @wa_active: keep track of active workarounds */
struct {
/** @wa_active.gt: bitmap with active GT workarounds */
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 7d2e937da1d8..64afc90ad2c5 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -327,6 +327,8 @@ static void xe_guc_ct_set_state(struct xe_guc_ct *ct,
xe_gt_assert(ct_to_gt(ct), ct->g2h_outstanding == 0 ||
state == XE_GUC_CT_STATE_STOPPED);
+ if (ct->g2h_outstanding)
+ xe_pm_runtime_put(ct_to_xe(ct));
ct->g2h_outstanding = 0;
ct->state = state;
@@ -495,10 +497,15 @@ static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len)
static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h)
{
xe_gt_assert(ct_to_gt(ct), g2h_len <= ct->ctbs.g2h.info.space);
+ xe_gt_assert(ct_to_gt(ct), (!g2h_len && !num_g2h) ||
+ (g2h_len && num_g2h));
if (g2h_len) {
lockdep_assert_held(&ct->fast_lock);
+ if (!ct->g2h_outstanding)
+ xe_pm_runtime_get_noresume(ct_to_xe(ct));
+
ct->ctbs.g2h.info.space -= g2h_len;
ct->g2h_outstanding += num_g2h;
}
@@ -511,7 +518,8 @@ static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space);
ct->ctbs.g2h.info.space += g2h_len;
- --ct->g2h_outstanding;
+ if (!--ct->g2h_outstanding)
+ xe_pm_runtime_put(ct_to_xe(ct));
}
static void g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 8d7e7f4bbff7..77b0f0d8f729 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -284,7 +284,7 @@ static void guc_submit_fini(struct drm_device *drm, void *arg)
free_submit_wq(guc);
}
-static void guc_submit_wedged_fini(struct drm_device *drm, void *arg)
+static void guc_submit_wedged_fini(void *arg)
{
struct xe_guc *guc = arg;
struct xe_exec_queue *q;
@@ -877,7 +877,7 @@ void xe_guc_submit_wedge(struct xe_guc *guc)
xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode);
- err = drmm_add_action_or_reset(&guc_to_xe(guc)->drm,
+ err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev,
guc_submit_wedged_fini, guc);
if (err) {
drm_err(&xe->drm, "Failed to register xe_guc_submit clean-up on wedged.mode=2. Although device is wedged.\n");
@@ -1393,6 +1393,8 @@ static void guc_exec_queue_process_msg(struct xe_sched_msg *msg)
default:
XE_WARN_ON("Unknown message type");
}
+
+ xe_pm_runtime_put(guc_to_xe(exec_queue_to_guc(msg->private_data)));
}
static const struct drm_sched_backend_ops drm_sched_ops = {
@@ -1482,6 +1484,8 @@ static void guc_exec_queue_kill(struct xe_exec_queue *q)
static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg,
u32 opcode)
{
+ xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q)));
+
INIT_LIST_HEAD(&msg->link);
msg->opcode = opcode;
msg->private_data = q;
diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c
index 45a9789cf501..0b4f12be3692 100644
--- a/drivers/gpu/drm/xe/xe_hw_fence.c
+++ b/drivers/gpu/drm/xe/xe_hw_fence.c
@@ -148,20 +148,20 @@ static const char *xe_hw_fence_get_driver_name(struct dma_fence *dma_fence)
{
struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence);
- return dev_name(gt_to_xe(fence->ctx->gt)->drm.dev);
+ return dev_name(fence->xe->drm.dev);
}
static const char *xe_hw_fence_get_timeline_name(struct dma_fence *dma_fence)
{
struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence);
- return fence->ctx->name;
+ return fence->name;
}
static bool xe_hw_fence_signaled(struct dma_fence *dma_fence)
{
struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence);
- struct xe_device *xe = gt_to_xe(fence->ctx->gt);
+ struct xe_device *xe = fence->xe;
u32 seqno = xe_map_rd(xe, &fence->seqno_map, 0, u32);
return dma_fence->error ||
@@ -253,7 +253,8 @@ void xe_hw_fence_init(struct dma_fence *fence, struct xe_hw_fence_ctx *ctx,
struct xe_hw_fence *hw_fence =
container_of(fence, typeof(*hw_fence), dma);
- hw_fence->ctx = ctx;
+ hw_fence->xe = gt_to_xe(ctx->gt);
+ snprintf(hw_fence->name, sizeof(hw_fence->name), "%s", ctx->name);
hw_fence->seqno_map = seqno_map;
INIT_LIST_HEAD(&hw_fence->irq_link);
diff --git a/drivers/gpu/drm/xe/xe_hw_fence_types.h b/drivers/gpu/drm/xe/xe_hw_fence_types.h
index b33c4956e8ea..364a61f4bfda 100644
--- a/drivers/gpu/drm/xe/xe_hw_fence_types.h
+++ b/drivers/gpu/drm/xe/xe_hw_fence_types.h
@@ -12,6 +12,7 @@
#include <linux/list.h>
#include <linux/spinlock.h>
+struct xe_device;
struct xe_gt;
/**
@@ -61,8 +62,10 @@ struct xe_hw_fence_ctx {
struct xe_hw_fence {
/** @dma: base dma fence for hardware fence context */
struct dma_fence dma;
- /** @ctx: hardware fence context */
- struct xe_hw_fence_ctx *ctx;
+ /** @xe: Xe device for hw fence driver name */
+ struct xe_device *xe;
+ /** @name: name of hardware fence context */
+ char name[MAX_FENCE_NAME_LEN];
/** @seqno_map: I/O map for seqno */
struct iosys_map seqno_map;
/** @irq_link: Link in struct xe_hw_fence_irq.pending */
diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
index 0c8ce09e5025..1faeca70900e 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.c
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -203,9 +203,10 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, int channel, long va
reg_val = xe_mmio_rmw32(hwmon->gt, rapl_limit, PKG_PWR_LIM_1_EN, 0);
reg_val = xe_mmio_read32(hwmon->gt, rapl_limit);
if (reg_val & PKG_PWR_LIM_1_EN) {
+ drm_warn(&gt_to_xe(hwmon->gt)->drm, "PL1 disable is not supported!\n");
ret = -EOPNOTSUPP;
- goto unlock;
}
+ goto unlock;
}
/* Computation in 64-bits to avoid overflow. Round to nearest. */
@@ -449,7 +450,7 @@ static int xe_hwmon_pcode_write_i1(struct xe_gt *gt, u32 uval)
{
return xe_pcode_write(gt, PCODE_MBOX(PCODE_POWER_SETUP,
POWER_SETUP_SUBCOMMAND_WRITE_I1, 0),
- uval);
+ (uval & POWER_SETUP_I1_DATA_MASK));
}
static int xe_hwmon_power_curr_crit_read(struct xe_hwmon *hwmon, int channel,
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 94ff62e1d95e..58121821f081 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -1634,6 +1634,9 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
if (!snapshot)
return NULL;
+ if (lrc->bo && lrc->bo->vm)
+ xe_vm_get(lrc->bo->vm);
+
snapshot->context_desc = xe_lrc_ggtt_addr(lrc);
snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc);
snapshot->head = xe_lrc_ring_head(lrc);
@@ -1653,12 +1656,14 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot)
{
struct xe_bo *bo;
+ struct xe_vm *vm;
struct iosys_map src;
if (!snapshot)
return;
bo = snapshot->lrc_bo;
+ vm = bo->vm;
snapshot->lrc_bo = NULL;
snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL);
@@ -1678,6 +1683,8 @@ void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot)
xe_bo_unlock(bo);
put_bo:
xe_bo_put(bo);
+ if (vm)
+ xe_vm_put(vm);
}
void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p)
@@ -1727,8 +1734,14 @@ void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot)
return;
kvfree(snapshot->lrc_snapshot);
- if (snapshot->lrc_bo)
+ if (snapshot->lrc_bo) {
+ struct xe_vm *vm;
+
+ vm = snapshot->lrc_bo->vm;
xe_bo_put(snapshot->lrc_bo);
+ if (vm)
+ xe_vm_put(vm);
+ }
kfree(snapshot);
}
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index f92faad4b96d..aa68cac9fdf8 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -30,7 +30,8 @@ static void tiles_fini(void *arg)
int id;
for_each_tile(tile, xe, id)
- tile->mmio.regs = NULL;
+ if (tile != xe_device_get_root_tile(xe))
+ tile->mmio.regs = NULL;
}
int xe_mmio_probe_tiles(struct xe_device *xe)
@@ -91,9 +92,11 @@ add_mmio_ext:
static void mmio_fini(void *arg)
{
struct xe_device *xe = arg;
+ struct xe_tile *root_tile = xe_device_get_root_tile(xe);
pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs);
xe->mmio.regs = NULL;
+ root_tile->mmio.regs = NULL;
}
int xe_mmio_init(struct xe_device *xe)
@@ -121,12 +124,29 @@ int xe_mmio_init(struct xe_device *xe)
return devm_add_action_or_reset(xe->drm.dev, mmio_fini, xe);
}
+static void mmio_flush_pending_writes(struct xe_gt *gt)
+{
+#define DUMMY_REG_OFFSET 0x130030
+ struct xe_tile *tile = gt_to_tile(gt);
+ int i;
+
+ if (tile->xe->info.platform != XE_LUNARLAKE)
+ return;
+
+ /* 4 dummy writes */
+ for (i = 0; i < 4; i++)
+ writel(0, tile->mmio.regs + DUMMY_REG_OFFSET);
+}
+
u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg)
{
struct xe_tile *tile = gt_to_tile(gt);
u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
u8 val;
+ /* Wa_15015404425 */
+ mmio_flush_pending_writes(gt);
+
val = readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
trace_xe_reg_rw(gt, false, addr, val, sizeof(val));
@@ -139,6 +159,9 @@ u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg)
u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
u16 val;
+ /* Wa_15015404425 */
+ mmio_flush_pending_writes(gt);
+
val = readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
trace_xe_reg_rw(gt, false, addr, val, sizeof(val));
@@ -160,6 +183,9 @@ u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg)
u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
u32 val;
+ /* Wa_15015404425 */
+ mmio_flush_pending_writes(gt);
+
if (!reg.vf && IS_SRIOV_VF(gt_to_xe(gt)))
val = xe_gt_sriov_vf_read32(gt, reg);
else
diff --git a/drivers/gpu/drm/xe/xe_observation.c b/drivers/gpu/drm/xe/xe_observation.c
index fcb584b42a7d..a78c92a44ec2 100644
--- a/drivers/gpu/drm/xe/xe_observation.c
+++ b/drivers/gpu/drm/xe/xe_observation.c
@@ -66,7 +66,6 @@ static struct ctl_table observation_ctl_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
- {}
};
/**
diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index 4ee32ee1cc88..722278cc23fc 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -7,6 +7,8 @@
#include <drm/xe_drm.h>
+#include <generated/xe_wa_oob.h>
+
#include "regs/xe_reg_defs.h"
#include "xe_assert.h"
#include "xe_device.h"
@@ -15,6 +17,7 @@
#include "xe_gt_mcr.h"
#include "xe_mmio.h"
#include "xe_sriov.h"
+#include "xe_wa.h"
#define _PAT_ATS 0x47fc
#define _PAT_INDEX(index) _PICK_EVEN_2RANGES(index, 8, \
@@ -382,7 +385,13 @@ void xe_pat_init_early(struct xe_device *xe)
if (GRAPHICS_VER(xe) == 20) {
xe->pat.ops = &xe2_pat_ops;
xe->pat.table = xe2_pat_table;
- xe->pat.n_entries = ARRAY_SIZE(xe2_pat_table);
+
+ /* Wa_16023588340. XXX: Should use XE_WA */
+ if (GRAPHICS_VERx100(xe) == 2001)
+ xe->pat.n_entries = 28; /* Disable CLOS3 */
+ else
+ xe->pat.n_entries = ARRAY_SIZE(xe2_pat_table);
+
xe->pat.idx[XE_CACHE_NONE] = 3;
xe->pat.idx[XE_CACHE_WT] = 15;
xe->pat.idx[XE_CACHE_WB] = 2;
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index de3b5df65e48..9a3f618d22dc 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -91,13 +91,13 @@ int xe_pm_suspend(struct xe_device *xe)
for_each_gt(gt, xe, id)
xe_gt_suspend_prepare(gt);
+ xe_display_pm_suspend(xe, false);
+
/* FIXME: Super racey... */
err = xe_bo_evict_all(xe);
if (err)
goto err;
- xe_display_pm_suspend(xe, false);
-
for_each_gt(gt, xe, id) {
err = xe_gt_suspend(gt);
if (err) {
@@ -151,11 +151,11 @@ int xe_pm_resume(struct xe_device *xe)
xe_irq_resume(xe);
- xe_display_pm_resume(xe, false);
-
for_each_gt(gt, xe, id)
xe_gt_resume(gt);
+ xe_display_pm_resume(xe, false);
+
err = xe_bo_restore_user(xe);
if (err)
goto err;
@@ -363,10 +363,11 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
mutex_unlock(&xe->mem_access.vram_userfault.lock);
if (xe->d3cold.allowed) {
+ xe_display_pm_suspend(xe, true);
+
err = xe_bo_evict_all(xe);
if (err)
goto out;
- xe_display_pm_suspend(xe, true);
}
for_each_gt(gt, xe, id) {
diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c
index e8b8ae5c6485..c453f45328b1 100644
--- a/drivers/gpu/drm/xe/xe_preempt_fence.c
+++ b/drivers/gpu/drm/xe/xe_preempt_fence.c
@@ -128,8 +128,9 @@ xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_exec_queue *q,
{
list_del_init(&pfence->link);
pfence->q = xe_exec_queue_get(q);
+ spin_lock_init(&pfence->lock);
dma_fence_init(&pfence->base, &preempt_fence_ops,
- &q->lr.lock, context, seqno);
+ &pfence->lock, context, seqno);
return &pfence->base;
}
diff --git a/drivers/gpu/drm/xe/xe_preempt_fence_types.h b/drivers/gpu/drm/xe/xe_preempt_fence_types.h
index b54b5c29b533..312c3372a49f 100644
--- a/drivers/gpu/drm/xe/xe_preempt_fence_types.h
+++ b/drivers/gpu/drm/xe/xe_preempt_fence_types.h
@@ -25,6 +25,8 @@ struct xe_preempt_fence {
struct xe_exec_queue *q;
/** @preempt_work: work struct which issues preemption */
struct work_struct preempt_work;
+ /** @lock: dma-fence fence lock */
+ spinlock_t lock;
/** @error: preempt fence is in error state */
int error;
};
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index ade9e7a3a0ad..31a751a5de3f 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -1115,23 +1115,6 @@ struct invalidation_fence {
u32 asid;
};
-static const char *
-invalidation_fence_get_driver_name(struct dma_fence *dma_fence)
-{
- return "xe";
-}
-
-static const char *
-invalidation_fence_get_timeline_name(struct dma_fence *dma_fence)
-{
- return "invalidation_fence";
-}
-
-static const struct dma_fence_ops invalidation_fence_ops = {
- .get_driver_name = invalidation_fence_get_driver_name,
- .get_timeline_name = invalidation_fence_get_timeline_name,
-};
-
static void invalidation_fence_cb(struct dma_fence *fence,
struct dma_fence_cb *cb)
{
@@ -1170,15 +1153,8 @@ static int invalidation_fence_init(struct xe_gt *gt,
trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base);
- spin_lock_irq(&gt->tlb_invalidation.lock);
- dma_fence_init(&ifence->base.base, &invalidation_fence_ops,
- &gt->tlb_invalidation.lock,
- dma_fence_context_alloc(1), 1);
- spin_unlock_irq(&gt->tlb_invalidation.lock);
-
- INIT_LIST_HEAD(&ifence->base.link);
+ xe_gt_tlb_invalidation_fence_init(gt, &ifence->base, false);
- dma_fence_get(&ifence->base.base); /* Ref for caller */
ifence->fence = fence;
ifence->gt = gt;
ifence->start = start;
diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index 02e28274282f..5efe83cc82ab 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -231,7 +231,7 @@ static void rtp_mark_active(struct xe_device *xe,
if (first == last)
bitmap_set(ctx->active_entries, first, 1);
else
- bitmap_set(ctx->active_entries, first, last - first + 2);
+ bitmap_set(ctx->active_entries, first, last - first + 1);
}
/**
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index 44d534e362cd..9628f9deb3c0 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -171,12 +171,13 @@ void xe_sched_job_destroy(struct kref *ref)
struct xe_sched_job *job =
container_of(ref, struct xe_sched_job, refcount);
struct xe_device *xe = job_to_xe(job);
+ struct xe_exec_queue *q = job->q;
xe_sched_job_free_fences(job);
- xe_exec_queue_put(job->q);
dma_fence_put(job->fence);
drm_sched_job_cleanup(&job->drm);
job_free(job);
+ xe_exec_queue_put(q);
xe_pm_runtime_put(xe);
}
diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
index 2883d9aca404..e8d31e010860 100644
--- a/drivers/gpu/drm/xe/xe_sync.c
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -53,14 +53,18 @@ static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr,
u64 value)
{
struct xe_user_fence *ufence;
+ u64 __user *ptr = u64_to_user_ptr(addr);
+
+ if (!access_ok(ptr, sizeof(ptr)))
+ return ERR_PTR(-EFAULT);
ufence = kmalloc(sizeof(*ufence), GFP_KERNEL);
if (!ufence)
- return NULL;
+ return ERR_PTR(-ENOMEM);
ufence->xe = xe;
kref_init(&ufence->refcount);
- ufence->addr = u64_to_user_ptr(addr);
+ ufence->addr = ptr;
ufence->value = value;
ufence->mm = current->mm;
mmgrab(ufence->mm);
@@ -183,8 +187,8 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
} else {
sync->ufence = user_fence_create(xe, sync_in.addr,
sync_in.timeline_value);
- if (XE_IOCTL_DBG(xe, !sync->ufence))
- return -ENOMEM;
+ if (XE_IOCTL_DBG(xe, IS_ERR(sync->ufence)))
+ return PTR_ERR(sync->ufence);
}
break;
@@ -263,7 +267,7 @@ void xe_sync_entry_cleanup(struct xe_sync_entry *sync)
if (sync->fence)
dma_fence_put(sync->fence);
if (sync->chain_fence)
- dma_fence_put(&sync->chain_fence->base);
+ dma_fence_chain_free(sync->chain_fence);
if (sync->ufence)
user_fence_put(sync->ufence);
}
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index baba14fb1e32..01837f6f609f 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -309,7 +309,7 @@ DECLARE_EVENT_CLASS(xe_hw_fence,
TP_ARGS(fence),
TP_STRUCT__entry(
- __string(dev, __dev_name_gt(fence->ctx->gt))
+ __string(dev, __dev_name_xe(fence->xe))
__field(u64, ctx)
__field(u32, seqno)
__field(struct xe_hw_fence *, fence)
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
index fe3779fdba2c..423b261ea743 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
@@ -150,7 +150,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
} while (remaining_size);
if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
- if (!drm_buddy_block_trim(mm, vres->base.size, &vres->blocks))
+ if (!drm_buddy_block_trim(mm, NULL, vres->base.size, &vres->blocks))
size = vres->base.size;
}
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 5b166fa03684..50e8fc49ba6c 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1601,6 +1601,10 @@ static void vm_destroy_work_func(struct work_struct *w)
XE_WARN_ON(vm->pt_root[id]);
trace_xe_vm_free(vm);
+
+ if (vm->xef)
+ xe_file_put(vm->xef);
+
kfree(vm);
}
@@ -1916,7 +1920,7 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
}
args->vm_id = id;
- vm->xef = xef;
+ vm->xef = xe_file_get(xef);
/* Record BO memory for VM pagetable created against client */
for_each_tile(tile, xe, id)
@@ -3337,10 +3341,11 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
{
struct xe_device *xe = xe_vma_vm(vma)->xe;
struct xe_tile *tile;
- u32 tile_needs_invalidate = 0;
- int seqno[XE_MAX_TILES_PER_DEVICE];
+ struct xe_gt_tlb_invalidation_fence
+ fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
u8 id;
- int ret;
+ u32 fence_id = 0;
+ int ret = 0;
xe_assert(xe, !xe_vma_is_null(vma));
trace_xe_vma_invalidate(vma);
@@ -3365,29 +3370,43 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
for_each_tile(tile, xe, id) {
if (xe_pt_zap_ptes(tile, vma)) {
- tile_needs_invalidate |= BIT(id);
xe_device_wmb(xe);
- /*
- * FIXME: We potentially need to invalidate multiple
- * GTs within the tile
- */
- seqno[id] = xe_gt_tlb_invalidation_vma(tile->primary_gt, NULL, vma);
- if (seqno[id] < 0)
- return seqno[id];
- }
- }
+ xe_gt_tlb_invalidation_fence_init(tile->primary_gt,
+ &fence[fence_id],
+ true);
+
+ ret = xe_gt_tlb_invalidation_vma(tile->primary_gt,
+ &fence[fence_id], vma);
+ if (ret < 0) {
+ xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]);
+ goto wait;
+ }
+ ++fence_id;
- for_each_tile(tile, xe, id) {
- if (tile_needs_invalidate & BIT(id)) {
- ret = xe_gt_tlb_invalidation_wait(tile->primary_gt, seqno[id]);
- if (ret < 0)
- return ret;
+ if (!tile->media_gt)
+ continue;
+
+ xe_gt_tlb_invalidation_fence_init(tile->media_gt,
+ &fence[fence_id],
+ true);
+
+ ret = xe_gt_tlb_invalidation_vma(tile->media_gt,
+ &fence[fence_id], vma);
+ if (ret < 0) {
+ xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]);
+ goto wait;
+ }
+ ++fence_id;
}
}
+wait:
+ for (id = 0; id < fence_id; ++id)
+ xe_gt_tlb_invalidation_fence_wait(&fence[id]);
+
vma->tile_invalidated = vma->tile_mask;
- return 0;
+ return ret;
}
struct xe_vm_snapshot {
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index c7bf0862b231..e648265d081b 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -486,6 +486,10 @@ static const struct xe_rtp_entry_sr engine_was[] = {
XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, SLM_WMTP_RESTORE))
},
+ { XE_RTP_NAME("14021402888"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)),
+ XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE))
+ },
/* Xe2_HPG */
@@ -538,6 +542,20 @@ static const struct xe_rtp_entry_sr engine_was[] = {
XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE))
},
+ { XE_RTP_NAME("14021821874"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, STK_ID_RESTRICT))
+ },
+
+ /* Xe2_LPM */
+
+ { XE_RTP_NAME("16021639441"),
+ XE_RTP_RULES(MEDIA_VERSION(2000)),
+ XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0),
+ GHWSP_CSB_REPORT_DIS |
+ PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS,
+ XE_RTP_ACTION_FLAG(ENGINE_BASE)))
+ },
/* Xe2_HPM */
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index 26066beb4f6f..08f7336881e3 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -29,3 +29,4 @@
13011645652 GRAPHICS_VERSION(2004)
22019338487 MEDIA_VERSION(2000)
GRAPHICS_VERSION(2001)
+16023588340 GRAPHICS_VERSION(2001)